Hi Arnaldo, On Fri, Mar 22, 2024 at 10:56:53AM -0300, Arnaldo Carvalho de Melo wrote: > On Wed, Mar 20, 2024 at 12:54:11PM +0000, Alan Maguire wrote: > > On 15/03/2024 19:48, Daniel Xu wrote: > > > This commit teaches pahole to parse symbols in .BTF_ids section in > > > vmlinux and discover exported kfuncs. Pahole then takes the list of > > > kfuncs and injects a BTF_KIND_DECL_TAG for each kfunc. > > > > > > Example of encoding: > > > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg "DECL_TAG 'bpf_kfunc'" | wc -l > > > 121 > > > > > > $ bpftool btf dump file .tmp_vmlinux.btf | rg 56337 > > > [56337] FUNC 'bpf_ct_change_timeout' type_id=56336 linkage=static > > > [127861] DECL_TAG 'bpf_kfunc' type_id=56337 component_idx=-1 > > > > > > This enables downstream users and tools to dynamically discover which > > > kfuncs are available on a system by parsing vmlinux or module BTF, both > > > available in /sys/kernel/btf. > > > > > > This feature is enabled with --btf_features=decl_tag,decl_tag_kfuncs. > > > > > > Signed-off-by: Daniel Xu <dxu@xxxxxxxxx> > > > > This is great work; a lot of steps needed to collect this info, but it's > > really valuable! > > > > Reviewed-by: Alan Maguire <alan.maguire@xxxxxxxxxx> > > Tested-by: Alan Maguire <alan.maguire@xxxxxxxxxx> > > > > BTW we need something like the attached patch to switch to using > > --btf_features for pahole 1.26 and later; will I send it officially or > > do you have something that does the same that you want to roll into your > > bpf-next series? Let me know what works from your side. Thanks! > > > > > --- > > > btf_encoder.c | 368 ++++++++++++++++++++++++++++++++++++++++++++++++++ > > > 1 file changed, 368 insertions(+) > > > > > > diff --git a/btf_encoder.c b/btf_encoder.c > > > index 850e36f..31848e2 100644 > > > --- a/btf_encoder.c > > > +++ b/btf_encoder.c > > > @@ -34,6 +34,21 @@ > > > #include <pthread.h> > > > > > > #define BTF_ENCODER_MAX_PROTO 512 > > > +#define BTF_IDS_SECTION ".BTF_ids" > > > +#define BTF_ID_FUNC_PFX "__BTF_ID__func__" > > > +#define BTF_ID_SET8_PFX "__BTF_ID__set8__" > > > +#define BTF_SET8_KFUNCS (1 << 0) > > > +#define BTF_KFUNC_TYPE_TAG "bpf_kfunc" > > > + > > > +/* Adapted from include/linux/btf_ids.h */ > > > +struct btf_id_set8 { > > > + uint32_t cnt; > > > + uint32_t flags; > > > + struct { > > > + uint32_t id; > > > + uint32_t flags; > > > + } pairs[]; > > > +}; > > > > > > /* state used to do later encoding of saved functions */ > > > struct btf_encoder_state { > > > @@ -75,6 +90,7 @@ struct btf_encoder { > > > verbose, > > > force, > > > gen_floats, > > > + skip_encoding_decl_tag, > > > tag_kfuncs, > > > is_rel; > > > uint32_t array_index_id; > > > @@ -94,6 +110,17 @@ struct btf_encoder { > > > } functions; > > > }; > > > > > > +struct btf_func { > > > + const char *name; > > > + int type_id; > > > +}; > > > + > > > +/* Half open interval representing range of addresses containing kfuncs */ > > > +struct btf_kfunc_set_range { > > > + size_t start; > > > + size_t end; > > Why size_t for addresses? No particular reason; seemed correct to handle 64bit and 32bit. I see uint64_t in other places. I can use that instead. > > > > +}; > > > + > > > static LIST_HEAD(encoders); > > > static pthread_mutex_t encoders__lock = PTHREAD_MUTEX_INITIALIZER; > > > > > > @@ -1363,8 +1390,339 @@ out: > > > return err; > > > } > > > > > > +/* Returns if `sym` points to a kfunc set */ > > > +static int is_sym_kfunc_set(GElf_Sym *sym, const char *name, Elf_Data *idlist, size_t idlist_addr) > > > +{ > > > + void *ptr = idlist->d_buf; > > > + struct btf_id_set8 *set; > > > + bool is_set8; > > > + int off; > > > + > > > + /* kfuncs are only found in BTF_SET8's */ > > > + is_set8 = !strncmp(name, BTF_ID_SET8_PFX, sizeof(BTF_ID_SET8_PFX) - 1); > > > + if (!is_set8) > > > + return false; > > > We have, as the kernel and tools/perf: > > dutil.h: * strstarts - does @str start with @prefix? > dutil.h:static inline bool strstarts(const char *str, const char *prefix) > > Can you please use it? Ack. > > > > + > > > + off = sym->st_value - idlist_addr; > > > + if (off >= idlist->d_size) { > > > + fprintf(stderr, "%s: symbol '%s' out of bounds\n", __func__, name); > > > + return false; > > > + } > > > + > > > + /* Check the set8 flags to see if it was marked as kfunc */ > > > + set = ptr + off; > > > + return set->flags & BTF_SET8_KFUNCS; > > > +} > > > + > > > +/* > > > + * Parse BTF_ID symbol and return the func name. > > > + * > > > + * Returns: > > > + * Caller-owned string containing func name if successful. > > > + * NULL if !func or on error. > > > + */ > > > +static char *get_func_name(const char *sym) > > > +{ > > > + char *func, *end; > > > + > > > + if (strncmp(sym, BTF_ID_FUNC_PFX, sizeof(BTF_ID_FUNC_PFX) - 1)) > > > + return NULL; > > strstarts() > > > > + /* Strip prefix */ > > > + func = strdup(sym + sizeof(BTF_ID_FUNC_PFX) - 1); > > > + if (strlen(func) < 2) { > > > + free(func); > > > + return NULL; > > > + } > > Can you add a comment as to why this exception? Does this limitation > matches one in the kernel? Where? This handles malformed input, such as "__BTF_ID__func___" (note the trailing triple underscore). I don't believe the kernel generates such names, but Eduard brought it up in [0] and I'd agree it'd be nice to not read out of bounds for all input. [0]: https://lore.kernel.org/bpf/itgzcn4cru32sk6sswbami2ojdynwyrfof3pnx5v4j3walx5oh@4qi3337fqnrz/ > > > > + > > > + /* Strip suffix */ > > > + end = strrchr(func, '_'); > > > + if (!end || *(end - 1) != '_') { > > > + free(func); > > > + return NULL; > > > + } > > > + *(end - 1) = '\0'; > > So the suffix has to have __? Can you add an example of such a func name > as a comment above this chopping? Sure. > > > > + return func; > > > +} > > > + > > > +static int btf_func_cmp(const void *_a, const void *_b) > > > +{ > > > + const struct btf_func *a = _a; > > > + const struct btf_func *b = _b; > > > + > > > + return strcmp(a->name, b->name); > > > +} > > > + > > > +/* > > > + * Collects all functions described in BTF. > > > + * Returns non-zero on error. > > > + */ > > > +static int btf_encoder__collect_btf_funcs(struct btf_encoder *encoder, struct gobuffer *funcs) > > > +{ > > > + struct btf *btf = encoder->btf; > > > + int nr_types, type_id; > > > + int err = -1; > > > + > > > + /* First collect all the func entries into an array */ > > > + nr_types = btf__type_cnt(btf); > > > + for (type_id = 1; type_id < nr_types; type_id++) { > > > + const struct btf_type *type; > > > + struct btf_func func = {}; > > > + const char *name; > > > + > > > + type = btf__type_by_id(btf, type_id); > > > + if (!type) { > > > + fprintf(stderr, "%s: malformed BTF, can't resolve type for ID %d\n", > > > + __func__, type_id); > > > + err = -EINVAL; > > > + goto out; > > > + } > > > + > > > + if (!btf_is_func(type)) > > > + continue; > > > + > > > + name = btf__name_by_offset(btf, type->name_off); > > > + if (!name) { > > > + fprintf(stderr, "%s: malformed BTF, can't resolve name for ID %d\n", > > > + __func__, type_id); > > > + err = -EINVAL; > > > + goto out; > > > + } > > > + > > > + func.name = name; > > > + func.type_id = type_id; > > > + err = gobuffer__add(funcs, &func, sizeof(func)); > > > + if (err < 0) > > > + goto out; > > > + } > > > + > > > + /* Now that we've collected funcs, sort them by name */ > > > + qsort((void *)gobuffer__entries(funcs), gobuffer__nr_entries(funcs), > > > + sizeof(struct btf_func), btf_func_cmp); > > Better to introcduce a 'gobuffer__sort(funcs, sizeof(struct btf_func), btf_func_cmp)' like we > have a gobuffer__compress(), please. Good idea [..] Thanks, Daniel