On Wed, Jan 31, 2024 at 11:17:23AM +0100, Jiri Olsa wrote: > On Sun, Jan 28, 2024 at 06:30:19PM -0700, Daniel Xu wrote: > > This commit teaches pahole to parse symbols in .BTF_ids section in > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > Example of encoding: > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l > > 121 > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 > > [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static > > [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 > > > > This enables downstream users and tools to dynamically discover which > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > available in /sys/kernel/btf. > > > > Signed-off-by: Daniel Xu <dxu@xxxxxxxxx> > > > > --- > > Changes from v2: > > * More reliably detect kfunc membership in set8 by tracking set addr ranges > > * Rename some variables/functions to be more clear about kfunc vs func > > > > Changes from v1: > > * Fix resource leaks > > * Fix callee -> caller typo > > * Rename btf_decl_tag from kfunc -> bpf_kfunc > > * Only grab btf_id_set funcs tagged kfunc > > * Presort btf func list > > > > btf_encoder.c | 347 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 347 insertions(+) > > > > diff --git a/btf_encoder.c b/btf_encoder.c > > index fd04008..4f742b1 100644 > > --- a/btf_encoder.c > > +++ b/btf_encoder.c > > @@ -34,6 +34,11 @@ > > #include <pthread.h> > > > > #define BTF_ENCODER_MAX_PROTO 512 > > +#define BTF_IDS_SECTION ".BTF_ids" > > +#define BTF_ID_FUNC_PFX "__BTF_ID__func__" > > +#define BTF_ID_SET8_PFX "__BTF_ID__set8__" > > +#define BTF_SET8_KFUNCS (1 << 0) > > +#define BTF_KFUNC_TYPE_TAG "bpf_kfunc" > > > > /* state used to do later encoding of saved functions */ > > struct btf_encoder_state { > > @@ -79,6 +84,7 @@ struct btf_encoder { > > gen_floats, > > is_rel; > > uint32_t array_index_id; > > + struct gobuffer btf_funcs; > > why does this need to be stored in encoder? I suppose it doesn't. It's used in multiple functions so I figured it'd be less verbose than passing it around. Also since it's fairly generic. I can move it to explicit arg passing if you want. > > > struct { > > struct var_info vars[MAX_PERCPU_VAR_CNT]; > > int var_cnt; > > @@ -94,6 +100,17 @@ struct btf_encoder { > > } functions; > > }; > > > > SNIP > > > +/* Returns if `sym` points to a kfunc set */ > > +static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr) > > +{ > > + int *ptr = idlist->d_buf; > > + int idx, flags; > > + bool is_set8; > > + > > + /* kfuncs are only found in BTF_SET8's */ > > + is_set8 = !strncmp(name, BTF_ID_SET8_PFX, sizeof(BTF_ID_SET8_PFX) - 1); > > + if (!is_set8) > > + return false; > > + > > + idx = sym->st_value - idlist_addr; > > + if (idx >= idlist->d_size) { > > + fprintf(stderr, "%s: symbol '%s' out of bounds\n", __func__, name); > > + return false; > > + } > > + > > + /* Check the set8 flags to see if it was marked as kfunc */ > > + idx = idx / sizeof(int); > > + flags = ptr[idx + 1]; > > + return flags & BTF_SET8_KFUNCS; > > I wonder it'd be easier to read/follow if we bring struct btf_id_set8 > declaration in here and use it to get the flags field Yeah, it probably would be. Since it looks like resolve_btfids is going that direction, might as well do it here as well. > > > +} > > + > > +/* > > + * Parse BTF_ID symbol and return the func name. > > + * > > + * Returns: > > + * Caller-owned string containing func name if successful. > > + * NULL if !func or on error. > > + */ > > + > > SNIP > > > + /* Cannot resolve symbol or .BTF_ids sections. Nothing to do. */ > > + if (symbols_shndx == -1 || idlist_shndx == -1) { > > + err = 0; > > + goto out; > > + } > > + > > + if (!gelf_getshdr(symscn, &shdr)) { > > + elf_error("Failed to get ELF symbol table header"); > > + goto out; > > + } > > + nr_syms = shdr.sh_size / shdr.sh_entsize; > > + > > + err = btf_encoder__collect_btf_funcs(encoder); > > + if (err) { > > + fprintf(stderr, "%s: failed to collect BTF funcs\n", __func__); > > + goto out; > > + } > > + > > + /* First collect all kfunc set ranges. > > + * > > + * Note we choose not to sort these ranges and accept a linear > > + * search when doing lookups. Reasoning is that the number of > > + * sets is ~O(100) and not worth the additional code to optimize. > > + */ > > I think we could event add gobuffer interface/support to sort and search > quickly the data (we use it in other place), but that can be done as follow > up when it will become a problem as you pointed out > > > + for (i = 0; i < nr_syms; i++) { > > + struct btf_kfunc_set_range range = {}; > > + const char *name; > > + GElf_Sym sym; > > + > > + if (!gelf_getsym(symbols, i, &sym)) { > > + elf_error("Failed to get ELF symbol(%d)", i); > > + goto out; > > + } > > + > > + if (sym.st_shndx != idlist_shndx) > > + continue; > > + > > + name = elf_strptr(elf, strtabidx, sym.st_name); > > + if (!is_sym_kfunc_set(&sym, name, idlist, idlist_addr)) > > + continue; > > + > > + range.start = sym.st_value; > > + range.end = sym.st_value + sym.st_size; > > + gobuffer__add(&btf_kfunc_ranges, &range, sizeof(range)); > > + } > > + > > + /* Now inject BTF with kfunc decl tag for detected kfuncs */ > > + for (i = 0; i < nr_syms; i++) { > > + const struct btf_kfunc_set_range *ranges; > > + unsigned int ranges_cnt; > > + char *func, *name; > > + GElf_Sym sym; > > + bool found; > > + int err; > > + int j; > > + > > + if (!gelf_getsym(symbols, i, &sym)) { > > + elf_error("Failed to get ELF symbol(%d)", i); > > + goto out; > > + } > > + > > + if (sym.st_shndx != idlist_shndx) > > + continue; > > + > > + name = elf_strptr(elf, strtabidx, sym.st_name); > > + func = get_func_name(name); > > + if (!func) > > + continue; > > + > > + /* Check if function belongs to a kfunc set */ > > + ranges = gobuffer__entries(&btf_kfunc_ranges); > > + ranges_cnt = gobuffer__nr_entries(&btf_kfunc_ranges); > > + found = false; > > + for (j = 0; j < ranges_cnt; j++) { > > + size_t addr = sym.st_value; > > missing newline after declaration > > > + if (ranges[j].start <= addr && addr < ranges[j].end) { > > + found = true; > > + break; > > + } > > + } > > + if (!found) > > leaking func Ack, nice catch. [..] Thanks, Daniel