Re: [PATCH v2 bpf-next 12/20] libbpf: Add support for bpf_arena.

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Tue, 13 Feb 2024 15:15:17 -0800

On Thu, Feb 8, 2024 at 8:07 PM Alexei Starovoitov
<alexei.starovoitov@xxxxxxxxx> wrote:
>
> From: Alexei Starovoitov <ast@xxxxxxxxxx>
>
> mmap() bpf_arena right after creation, since the kernel needs to
> remember the address returned from mmap. This is user_vm_start.
> LLVM will generate bpf_arena_cast_user() instructions where
> necessary and JIT will add upper 32-bit of user_vm_start
> to such pointers.
>
> Fix up bpf_map_mmap_sz() to compute mmap size as
> map->value_size * map->max_entries for arrays and
> PAGE_SIZE * map->max_entries for arena.
>
> Don't set BTF at arena creation time, since it doesn't support it.
>
> Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
> ---
>  tools/lib/bpf/libbpf.c        | 43 ++++++++++++++++++++++++++++++-----
>  tools/lib/bpf/libbpf_probes.c |  7 ++++++
>  2 files changed, 44 insertions(+), 6 deletions(-)
>
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 01f407591a92..4880d623098d 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -185,6 +185,7 @@ static const char * const map_type_name[] = {
>         [BPF_MAP_TYPE_BLOOM_FILTER]             = "bloom_filter",
>         [BPF_MAP_TYPE_USER_RINGBUF]             = "user_ringbuf",
>         [BPF_MAP_TYPE_CGRP_STORAGE]             = "cgrp_storage",
> +       [BPF_MAP_TYPE_ARENA]                    = "arena",
>  };
>
>  static const char * const prog_type_name[] = {
> @@ -1577,7 +1578,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
>         return map;
>  }
>
> -static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
> +static size_t __bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)

please rename this to array_map_mmap_sz, underscores are not very meaningful

>  {
>         const long page_sz = sysconf(_SC_PAGE_SIZE);
>         size_t map_sz;
> @@ -1587,6 +1588,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
>         return map_sz;
>  }
>
> +static size_t bpf_map_mmap_sz(const struct bpf_map *map)
> +{
> +       const long page_sz = sysconf(_SC_PAGE_SIZE);
> +
> +       switch (map->def.type) {
> +       case BPF_MAP_TYPE_ARRAY:
> +               return __bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
> +       case BPF_MAP_TYPE_ARENA:
> +               return page_sz * map->def.max_entries;
> +       default:
> +               return 0; /* not supported */
> +       }
> +}
> +
>  static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
>  {
>         void *mmaped;
> @@ -1740,7 +1755,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
>         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
>                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
>
> -       mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
> +       mmap_sz = bpf_map_mmap_sz(map);
>         map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
>                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
>         if (map->mmaped == MAP_FAILED) {
> @@ -4852,6 +4867,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
>         case BPF_MAP_TYPE_SOCKHASH:
>         case BPF_MAP_TYPE_QUEUE:
>         case BPF_MAP_TYPE_STACK:
> +       case BPF_MAP_TYPE_ARENA:
>                 create_attr.btf_fd = 0;
>                 create_attr.btf_key_type_id = 0;
>                 create_attr.btf_value_type_id = 0;
> @@ -4908,6 +4924,21 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
>         if (map->fd == map_fd)
>                 return 0;
>
> +       if (def->type == BPF_MAP_TYPE_ARENA) {
> +               map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map),
> +                                  PROT_READ | PROT_WRITE,
> +                                  map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
> +                                  map_fd, 0);
> +               if (map->mmaped == MAP_FAILED) {
> +                       err = -errno;
> +                       map->mmaped = NULL;
> +                       close(map_fd);
> +                       pr_warn("map '%s': failed to mmap bpf_arena: %d\n",
> +                               bpf_map__name(map), err);

seems like we just use `map->name` directly elsewhere in this
function, let's keep it consistent

> +                       return err;
> +               }
> +       }
> +
>         /* Keep placeholder FD value but now point it to the BPF map object.
>          * This way everything that relied on this map's FD (e.g., relocated
>          * ldimm64 instructions) will stay valid and won't need adjustments.
> @@ -8582,7 +8613,7 @@ static void bpf_map__destroy(struct bpf_map *map)
>         if (map->mmaped) {
>                 size_t mmap_sz;
>
> -               mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
> +               mmap_sz = bpf_map_mmap_sz(map);
>                 munmap(map->mmaped, mmap_sz);
>                 map->mmaped = NULL;
>         }
> @@ -9830,8 +9861,8 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
>                 int err;
>                 size_t mmap_old_sz, mmap_new_sz;
>

this logic assumes ARRAY (which are the only ones so far that could
have `map->mapped != NULL`, so I think we should error out for ARENA
maps here, instead of silently doing the wrong thing?

if (map->type != BPF_MAP_TYPE_ARRAY)
    return -EOPNOTSUPP;

should do

> -               mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
> -               mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries);
> +               mmap_old_sz = bpf_map_mmap_sz(map);
> +               mmap_new_sz = __bpf_map_mmap_sz(size, map->def.max_entries);
>                 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
>                 if (err) {
>                         pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
> @@ -13356,7 +13387,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
>
>         for (i = 0; i < s->map_cnt; i++) {
>                 struct bpf_map *map = *s->maps[i].map;
> -               size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
> +               size_t mmap_sz = bpf_map_mmap_sz(map);
>                 int prot, map_fd = map->fd;
>                 void **mmaped = s->maps[i].mmaped;
>
> diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
> index ee9b1dbea9eb..302188122439 100644
> --- a/tools/lib/bpf/libbpf_probes.c
> +++ b/tools/lib/bpf/libbpf_probes.c
> @@ -338,6 +338,13 @@ static int probe_map_create(enum bpf_map_type map_type)
>                 key_size = 0;
>                 max_entries = 1;
>                 break;
> +       case BPF_MAP_TYPE_ARENA:
> +               key_size        = 0;
> +               value_size      = 0;
> +               max_entries     = 1; /* one page */
> +               opts.map_extra  = 0; /* can mmap() at any address */
> +               opts.map_flags  = BPF_F_MMAPABLE;
> +               break;
>         case BPF_MAP_TYPE_HASH:
>         case BPF_MAP_TYPE_ARRAY:
>         case BPF_MAP_TYPE_PROG_ARRAY:
> --
> 2.34.1
>