Re: [PATCH bpf-next 2/4] bpf, libbpf: Support global percpu data

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@xxxxxxxxx> wrote:
>
> This patch introduces support for global percpu data in libbpf. A new
> section named ".percpu" is added, similar to the existing ".data" section.
> Internal maps are created for ".percpu" sections, which are then
> initialized and populated accordingly.
>
> The changes include:
>
> * Introduction of the ".percpu" section in libbpf.
> * Creation of internal maps for percpu data.
> * Initialization and population of these maps.
>
> This enhancement allows BPF programs to efficiently manage and access
> percpu global data, improving performance for use cases that require
> percpu buffer.
>
> Signed-off-by: Leon Hwang <leon.hwang@xxxxxxxxx>
> ---
>  tools/lib/bpf/libbpf.c | 172 ++++++++++++++++++++++++++++++++---------
>  1 file changed, 135 insertions(+), 37 deletions(-)
>
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 194809da51725..6da6004c5c84d 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -516,6 +516,7 @@ struct bpf_struct_ops {
>  };
>
>  #define DATA_SEC ".data"
> +#define PERCPU_DATA_SEC ".percpu"
>  #define BSS_SEC ".bss"
>  #define RODATA_SEC ".rodata"
>  #define KCONFIG_SEC ".kconfig"
> @@ -530,6 +531,7 @@ enum libbpf_map_type {
>         LIBBPF_MAP_BSS,
>         LIBBPF_MAP_RODATA,
>         LIBBPF_MAP_KCONFIG,
> +       LIBBPF_MAP_PERCPU_DATA,

nit: let's keep it shorter: LIBBPF_MAP_PERCPU

>  };
>
>  struct bpf_map_def {
> @@ -562,6 +564,7 @@ struct bpf_map {
>         __u32 btf_value_type_id;
>         __u32 btf_vmlinux_value_type_id;
>         enum libbpf_map_type libbpf_type;
> +       void *data;
>         void *mmaped;
>         struct bpf_struct_ops *st_ops;
>         struct bpf_map *inner_map;
> @@ -640,6 +643,7 @@ enum sec_type {
>         SEC_DATA,
>         SEC_RODATA,
>         SEC_ST_OPS,
> +       SEC_PERCPU_DATA,

ditto, just SEC_PERCPU?

>  };
>
>  struct elf_sec_desc {
> @@ -1923,13 +1927,24 @@ static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
>         return false;
>  }
>
> +static void map_copy_data(struct bpf_map *map, const void *data)
> +{
> +       bool is_percpu_data = map->libbpf_type == LIBBPF_MAP_PERCPU_DATA;
> +       size_t data_sz = map->def.value_size;
> +
> +       if (data)
> +               memcpy(is_percpu_data ? map->data : map->mmaped, data, data_sz);
> +}
> +
>  static int
>  bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
>                               const char *real_name, int sec_idx, void *data, size_t data_sz)
>  {
> +       bool is_percpu_data = type == LIBBPF_MAP_PERCPU_DATA;
>         struct bpf_map_def *def;
>         struct bpf_map *map;
>         size_t mmap_sz;
> +       size_t elem_sz;

nit: just:

size_t mmap_sz, elem_sz;

>         int err;
>
>         map = bpf_object__add_map(obj);
> @@ -1948,7 +1963,8 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
>         }
>
>         def = &map->def;
> -       def->type = BPF_MAP_TYPE_ARRAY;
> +       def->type = is_percpu_data ? BPF_MAP_TYPE_PERCPU_ARRAY
> +                                  : BPF_MAP_TYPE_ARRAY;

nit: single line

>         def->key_size = sizeof(int);
>         def->value_size = data_sz;
>         def->max_entries = 1;

[...]

> +               if (map_is_mmapable(obj, map))
> +                       def->map_flags |= BPF_F_MMAPABLE;
> +
> +               mmap_sz = bpf_map_mmap_sz(map);
> +               map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
> +                                  MAP_SHARED | MAP_ANONYMOUS, -1, 0);
> +               if (map->mmaped == MAP_FAILED) {
> +                       err = -errno;
> +                       map->mmaped = NULL;
> +                       pr_warn("map '%s': failed to alloc content buffer: %s\n",
> +                               map->name, errstr(err));
> +                       goto free_name;
> +               }
> +
> +               map_copy_data(map, data);

why not memcpy() here? you know it's not percpu map, so why obscuring
that memcpy?


> +       }
>
>         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
>         return 0;

[...]

> @@ -5125,23 +5180,54 @@ static int
>  bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
>  {
>         enum libbpf_map_type map_type = map->libbpf_type;
> -       int err, zero = 0;
> +       bool is_percpu_data = map_type == LIBBPF_MAP_PERCPU_DATA;
> +       int err = 0, zero = 0;
> +       void *data = NULL;
> +       int num_cpus, i;
> +       size_t data_sz;
> +       size_t elem_sz;
>         size_t mmap_sz;

nit: keep those size_t variables grouped: `size_t mmap_sz, data_sz, elem_sz;`

>
> +       data_sz = map->def.value_size;
> +       if (is_percpu_data) {
> +               num_cpus = libbpf_num_possible_cpus();
> +               if (num_cpus < 0) {
> +                       err = libbpf_err_errno(num_cpus);

why? num_cpus *IS* error code if num_cpus < 0

> +                       pr_warn("map '%s': failed to get num_cpus: %s\n",
> +                               bpf_map__name(map), errstr(err));

this is unlikely to happen, I'd drop pr_warn()

> +                       return err;
> +               }
> +
> +               elem_sz = roundup(data_sz, 8);
> +               data_sz = elem_sz * num_cpus;
> +               data = malloc(data_sz);
> +               if (!data) {
> +                       err = -ENOMEM;
> +                       pr_warn("map '%s': failed to malloc memory: %s\n",
> +                               bpf_map__name(map), errstr(err));

-ENOMEM is rather self-descriptive (and generally not expected), so
don't add pr_warn() for such cases

> +                       return err;
> +               }
> +
> +               for (i = 0; i < num_cpus; i++)
> +                       memcpy(data + i * elem_sz, map->data, elem_sz);
> +       } else {
> +               data = map->mmaped;
> +       }
> +

[...]

>  static void bpf_map__destroy(struct bpf_map *map);
> @@ -8120,7 +8209,9 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
>         struct bpf_map *m;
>
>         bpf_object__for_each_map(m, obj) {
> -               if (!bpf_map__is_internal(m))
> +               if (!bpf_map__is_internal(m) ||
> +                   /* percpu data map is internal and not-mmapable. */
> +                   m->libbpf_type == LIBBPF_MAP_PERCPU_DATA)

original logic would work anyways, no? let's not add unnecessary
special casing here

>                         continue;
>                 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
>                         m->def.map_flags &= ~BPF_F_MMAPABLE;
> @@ -9041,6 +9132,8 @@ static void bpf_map__destroy(struct bpf_map *map)
>         if (map->mmaped && map->mmaped != map->obj->arena_data)
>                 munmap(map->mmaped, bpf_map_mmap_sz(map));
>         map->mmaped = NULL;
> +       if (map->data)
> +               zfree(&map->data);
>

this whole map->mmaped and map->data duality and duplication seems not
worth it, tbh. Maybe we should keep using map->mmaped (we probably
could name it more generically at some point, but I don't want to
start bike shedding now) even for malloc'ed memory? After all, we
already have ARENA as another special case? WDYT, can your changes be
implemented by reusing map->mmaped, taking into account a type of map?

pw-bot: cr

>         if (map->st_ops) {
>                 zfree(&map->st_ops->data);
> @@ -10132,14 +10225,18 @@ int bpf_map__fd(const struct bpf_map *map)
>
>  static bool map_uses_real_name(const struct bpf_map *map)
>  {
> -       /* Since libbpf started to support custom .data.* and .rodata.* maps,
> -        * their user-visible name differs from kernel-visible name. Users see
> -        * such map's corresponding ELF section name as a map name.
> -        * This check distinguishes .data/.rodata from .data.* and .rodata.*
> -        * maps to know which name has to be returned to the user.
> +       /* Since libbpf started to support custom .data.*, .percpu.* and
> +        * .rodata.* maps, their user-visible name differs from kernel-visible
> +        * name. Users see such map's corresponding ELF section name as a map
> +        * name. This check distinguishes .data/.percpu/.rodata from .data.*,
> +        * .percpu.* and .rodata.* maps to know which name has to be returned to
> +        * the user.
>          */
>         if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
>                 return true;
> +       if (map->libbpf_type == LIBBPF_MAP_PERCPU_DATA &&
> +           strcmp(map->real_name, PERCPU_DATA_SEC) != 0)
> +               return true;

nit: shorten LIBBPF_MAP_PERCPU_DATA and keep single line, please

>         if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
>                 return true;
>         return false;
> @@ -10348,7 +10445,8 @@ int bpf_map__set_initial_value(struct bpf_map *map,
>         if (map->obj->loaded || map->reused)
>                 return libbpf_err(-EBUSY);
>
> -       if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
> +       if ((!map->mmaped && !map->data) ||
> +           map->libbpf_type == LIBBPF_MAP_KCONFIG)
>                 return libbpf_err(-EINVAL);
>
>         if (map->def.type == BPF_MAP_TYPE_ARENA)
> @@ -10358,7 +10456,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
>         if (size != actual_sz)
>                 return libbpf_err(-EINVAL);
>
> -       memcpy(map->mmaped, data, size);
> +       map_copy_data(map, data);
>         return 0;
>  }
>
> @@ -10370,7 +10468,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
>                 return map->st_ops->data;
>         }
>
> -       if (!map->mmaped)
> +       if (!map->mmaped && !map->data)
>                 return NULL;
>
>         if (map->def.type == BPF_MAP_TYPE_ARENA)
> @@ -10378,7 +10476,7 @@ void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
>         else
>                 *psize = map->def.value_size;
>
> -       return map->mmaped;
> +       return map->libbpf_type == LIBBPF_MAP_PERCPU_DATA ? map->data : map->mmaped;

Good chunk of changes like this wouldn't be necessary if we just reuse
map->mmaped.


>  }
>
>  bool bpf_map__is_internal(const struct bpf_map *map)
> --
> 2.47.1
>





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux