On Fri, Feb 9, 2024 at 11:17 PM Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> wrote: > > On Fri, 9 Feb 2024 at 05:07, Alexei Starovoitov > <alexei.starovoitov@xxxxxxxxx> wrote: > > > > From: Alexei Starovoitov <ast@xxxxxxxxxx> > > > > mmap() bpf_arena right after creation, since the kernel needs to > > remember the address returned from mmap. This is user_vm_start. > > LLVM will generate bpf_arena_cast_user() instructions where > > necessary and JIT will add upper 32-bit of user_vm_start > > to such pointers. > > > > Fix up bpf_map_mmap_sz() to compute mmap size as > > map->value_size * map->max_entries for arrays and > > PAGE_SIZE * map->max_entries for arena. > > > > Don't set BTF at arena creation time, since it doesn't support it. > > > > Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx> > > --- > > tools/lib/bpf/libbpf.c | 43 ++++++++++++++++++++++++++++++----- > > tools/lib/bpf/libbpf_probes.c | 7 ++++++ > > 2 files changed, 44 insertions(+), 6 deletions(-) > > > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > > index 01f407591a92..4880d623098d 100644 > > --- a/tools/lib/bpf/libbpf.c > > +++ b/tools/lib/bpf/libbpf.c > > @@ -185,6 +185,7 @@ static const char * const map_type_name[] = { > > [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter", > > [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf", > > [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage", > > + [BPF_MAP_TYPE_ARENA] = "arena", > > }; > > > > static const char * const prog_type_name[] = { > > @@ -1577,7 +1578,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) > > return map; > > } > > > > -static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > +static size_t __bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > { > > const long page_sz = sysconf(_SC_PAGE_SIZE); > > size_t map_sz; > > @@ -1587,6 +1588,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) > > return map_sz; > > } > > > > +static size_t bpf_map_mmap_sz(const struct bpf_map *map) > > +{ > > + const long page_sz = sysconf(_SC_PAGE_SIZE); > > + > > + switch (map->def.type) { > > + case BPF_MAP_TYPE_ARRAY: > > + return __bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > > + case BPF_MAP_TYPE_ARENA: > > + return page_sz * map->def.max_entries; > > + default: > > + return 0; /* not supported */ > > + } > > +} > > + > > static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) > > { > > void *mmaped; > > @@ -1740,7 +1755,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, > > pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", > > map->name, map->sec_idx, map->sec_offset, def->map_flags); > > > > - mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); > > + mmap_sz = bpf_map_mmap_sz(map); > > map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, > > MAP_SHARED | MAP_ANONYMOUS, -1, 0); > > if (map->mmaped == MAP_FAILED) { > > @@ -4852,6 +4867,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > > case BPF_MAP_TYPE_SOCKHASH: > > case BPF_MAP_TYPE_QUEUE: > > case BPF_MAP_TYPE_STACK: > > + case BPF_MAP_TYPE_ARENA: > > create_attr.btf_fd = 0; > > create_attr.btf_key_type_id = 0; > > create_attr.btf_value_type_id = 0; > > @@ -4908,6 +4924,21 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b > > if (map->fd == map_fd) > > return 0; > > > > + if (def->type == BPF_MAP_TYPE_ARENA) { > > + map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map), > > + PROT_READ | PROT_WRITE, > > + map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, > > + map_fd, 0); > > + if (map->mmaped == MAP_FAILED) { > > + err = -errno; > > + map->mmaped = NULL; > > + close(map_fd); > > + pr_warn("map '%s': failed to mmap bpf_arena: %d\n", > > + bpf_map__name(map), err); > > + return err; > > + } > > + } > > + > > Would it be possible to introduce a public API accessor for getting > the value of map->mmaped? That would be bpf_map__initial_value(), no? > Otherwise one would have to parse through /proc/self/maps in case > map_extra is 0. > > The use case is to be able to use the arena as a backing store for > userspace malloc arenas, so that > we can pass through malloc/mallocx calls (or class specific operator > new) directly to malloc arena using the BPF arena. > In such a case a lot of the burden of converting existing data > structures or code can be avoided by making much of the process > transparent. > Userspace malloced objects can also be easily shared to BPF progs as a > pool through bpf_ma style per-CPU allocator. > > > [...]