This patch introduces support for global percpu data in libbpf by adding a new ".percpu" section, similar to ".data". It enables efficient handling of percpu global variables in bpf programs. Key changes: * Introduces the ".percpu" section in libbpf. * Correct value size to __aligned(8) of ".percpu" map definition and btf. * Creates internal maps for percpu data. * Initializes and populates these maps accordingly. This enhancement improves performance for workloads that benefit from percpu storage. Meanwhile, add bpf_map__is_internal_percpu() API to check whether the map is an internal map used for global percpu variables. Signed-off-by: Leon Hwang <leon.hwang@xxxxxxxxx> --- tools/lib/bpf/libbpf.c | 101 +++++++++++++++++++++++++++++++-------- tools/lib/bpf/libbpf.h | 9 ++++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 90 insertions(+), 21 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 194809da51725..736a902a667e9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -516,6 +516,7 @@ struct bpf_struct_ops { }; #define DATA_SEC ".data" +#define PERCPU_SEC ".percpu" #define BSS_SEC ".bss" #define RODATA_SEC ".rodata" #define KCONFIG_SEC ".kconfig" @@ -530,6 +531,7 @@ enum libbpf_map_type { LIBBPF_MAP_BSS, LIBBPF_MAP_RODATA, LIBBPF_MAP_KCONFIG, + LIBBPF_MAP_PERCPU, }; struct bpf_map_def { @@ -640,6 +642,7 @@ enum sec_type { SEC_DATA, SEC_RODATA, SEC_ST_OPS, + SEC_PERCPU, }; struct elf_sec_desc { @@ -1903,7 +1906,7 @@ static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map) struct btf_var_secinfo *vsi; int i, n; - if (!map->btf_value_type_id) + if (!map->btf_value_type_id || map->libbpf_type == LIBBPF_MAP_PERCPU) return false; t = btf__type_by_id(obj->btf, map->btf_value_type_id); @@ -1927,6 +1930,7 @@ static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, const char *real_name, int sec_idx, void *data, size_t data_sz) { + bool is_percpu = type == LIBBPF_MAP_PERCPU; struct bpf_map_def *def; struct bpf_map *map; size_t mmap_sz; @@ -1948,9 +1952,9 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, } def = &map->def; - def->type = BPF_MAP_TYPE_ARRAY; + def->type = is_percpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY; def->key_size = sizeof(int); - def->value_size = data_sz; + def->value_size = is_percpu ? roundup(data_sz, 8) : data_sz; def->max_entries = 1; def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG ? BPF_F_RDONLY_PROG : 0; @@ -1961,10 +1965,11 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, if (map_is_mmapable(obj, map)) def->map_flags |= BPF_F_MMAPABLE; - pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", - map->name, map->sec_idx, map->sec_offset, def->map_flags); + pr_debug("map '%s' (global %sdata): at sec_idx %d, offset %zu, flags %x.\n", + map->name, is_percpu ? "percpu " : "", map->sec_idx, + map->sec_offset, def->map_flags); - mmap_sz = bpf_map_mmap_sz(map); + mmap_sz = is_percpu ? def->value_size : bpf_map_mmap_sz(map); map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (map->mmaped == MAP_FAILED) { @@ -2015,6 +2020,13 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) sec_desc->data->d_buf, sec_desc->data->d_size); break; + case SEC_PERCPU: + sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_PERCPU, + sec_name, sec_idx, + sec_desc->data->d_buf, + sec_desc->data->d_size); + break; case SEC_BSS: sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, @@ -3364,6 +3376,10 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, fixup_offsets = true; } + /* .percpu DATASEC must has __aligned(8) size. */ + if (strcmp(sec_name, PERCPU_SEC) == 0 || str_has_pfx(sec_name, PERCPU_SEC)) + t->size = roundup(t->size, 8); + for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) { const struct btf_type *t_var; struct btf_var *var; @@ -3934,6 +3950,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj) sec_desc->sec_type = SEC_RODATA; sec_desc->shdr = sh; sec_desc->data = data; + } else if (strcmp(name, PERCPU_SEC) == 0 || + str_has_pfx(name, PERCPU_SEC)) { + sec_desc->sec_type = SEC_PERCPU; + sec_desc->shdr = sh; + sec_desc->data = data; } else if (strcmp(name, STRUCT_OPS_SEC) == 0 || strcmp(name, STRUCT_OPS_LINK_SEC) == 0 || strcmp(name, "?" STRUCT_OPS_SEC) == 0 || @@ -4453,6 +4474,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj, case SEC_BSS: case SEC_DATA: case SEC_RODATA: + case SEC_PERCPU: return true; default: return false; @@ -4478,6 +4500,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) return LIBBPF_MAP_DATA; case SEC_RODATA: return LIBBPF_MAP_RODATA; + case SEC_PERCPU: + return LIBBPF_MAP_PERCPU; default: return LIBBPF_MAP_UNSPEC; } @@ -4795,7 +4819,7 @@ static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map) /* * LLVM annotates global data differently in BTF, that is, - * only as '.data', '.bss' or '.rodata'. + * only as '.data', '.bss', '.percpu' or '.rodata'. */ if (!bpf_map__is_internal(map)) return -ENOENT; @@ -5125,23 +5149,47 @@ static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { enum libbpf_map_type map_type = map->libbpf_type; - int err, zero = 0; - size_t mmap_sz; + bool is_percpu = map_type == LIBBPF_MAP_PERCPU; + int err = 0, zero = 0, num_cpus, i; + size_t data_sz, elem_sz, mmap_sz; + void *data = NULL; + + data_sz = map->def.value_size; + if (is_percpu) { + num_cpus = libbpf_num_possible_cpus(); + if (num_cpus < 0) { + err = num_cpus; + return err; + } + + data_sz = data_sz * num_cpus; + data = malloc(data_sz); + if (!data) { + err = -ENOMEM; + return err; + } + + elem_sz = map->def.value_size; + for (i = 0; i < num_cpus; i++) + memcpy(data + i * elem_sz, map->mmaped, elem_sz); + } else { + data = map->mmaped; + } if (obj->gen_loader) { bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, - map->mmaped, map->def.value_size); + data, data_sz); if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) bpf_gen__map_freeze(obj->gen_loader, map - obj->maps); - return 0; + goto free_data; } - err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); + err = bpf_map_update_elem(map->fd, &zero, data, 0); if (err) { err = -errno; pr_warn("map '%s': failed to set initial contents: %s\n", bpf_map__name(map), errstr(err)); - return err; + goto free_data; } /* Freeze .rodata and .kconfig map as read-only from syscall side. */ @@ -5151,7 +5199,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = -errno; pr_warn("map '%s': failed to freeze as read-only: %s\n", bpf_map__name(map), errstr(err)); - return err; + goto free_data; } } @@ -5178,7 +5226,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = -errno; pr_warn("map '%s': failed to re-mmap() contents: %s\n", bpf_map__name(map), errstr(err)); - return err; + goto free_data; } map->mmaped = mmaped; } else if (map->mmaped) { @@ -5186,7 +5234,10 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) map->mmaped = NULL; } - return 0; +free_data: + if (is_percpu) + free(data); + return err; } static void bpf_map__destroy(struct bpf_map *map); @@ -10132,14 +10183,17 @@ int bpf_map__fd(const struct bpf_map *map) static bool map_uses_real_name(const struct bpf_map *map) { - /* Since libbpf started to support custom .data.* and .rodata.* maps, - * their user-visible name differs from kernel-visible name. Users see - * such map's corresponding ELF section name as a map name. - * This check distinguishes .data/.rodata from .data.* and .rodata.* - * maps to know which name has to be returned to the user. + /* Since libbpf started to support custom .data.*, .percpu.* and + * .rodata.* maps, their user-visible name differs from kernel-visible + * name. Users see such map's corresponding ELF section name as a map + * name. This check distinguishes .data/.percpu/.rodata from .data.*, + * .percpu.* and .rodata.* maps to know which name has to be returned to + * the user. */ if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0) return true; + if (map->libbpf_type == LIBBPF_MAP_PERCPU && strcmp(map->real_name, PERCPU_SEC) != 0) + return true; if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0) return true; return false; @@ -10386,6 +10440,11 @@ bool bpf_map__is_internal(const struct bpf_map *map) return map->libbpf_type != LIBBPF_MAP_UNSPEC; } +bool bpf_map__is_internal_percpu(const struct bpf_map *map) +{ + return map->libbpf_type == LIBBPF_MAP_PERCPU; +} + __u32 bpf_map__ifindex(const struct bpf_map *map) { return map->map_ifindex; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 3020ee45303a0..1d8ca33d370d1 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -1072,6 +1072,15 @@ LIBBPF_API void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize */ LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); +/** + * @brief **bpf_map__is_internal_percpu()** tells the caller whether or not + * the passed map is an internal map used for global percpu variables. + * @param map the bpf_map + * @return true, if the map is an internal map used for global percpu + * variables; false, otherwise + */ +LIBBPF_API bool bpf_map__is_internal_percpu(const struct bpf_map *map); + /** * @brief **bpf_map__set_pin_path()** sets the path attribute that tells where the * BPF map should be pinned. This does not actually create the 'pin'. diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index b5a838de6f47c..09cdbd6e32218 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -438,4 +438,5 @@ LIBBPF_1.6.0 { bpf_linker__new_fd; btf__add_decl_attr; btf__add_type_attr; + bpf_map__is_internal_percpu; } LIBBPF_1.5.0; -- 2.47.1