On Tue, Jul 11, 2023 at 2:03 AM Jiri Olsa <olsajiri@xxxxxxxxx> wrote: > > On Thu, Jul 06, 2023 at 04:24:48PM -0700, Andrii Nakryiko wrote: > > SNIP > > > > Suggested-by: Andrii Nakryiko <andrii@xxxxxxxxxx> > > > Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx> > > > --- > > > tools/lib/bpf/elf.c | 178 +++++++++++++++++++++++++++++--------------- > > > 1 file changed, 117 insertions(+), 61 deletions(-) > > > > > > > A bunch of nits, but overall looks good. Please address nits, and add my ack > > > > Acked-by: Andrii Nakryiko <andrii@xxxxxxxxxx> > > > > > diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c > > > index 74e35071d22e..fcce4bd2478f 100644 > > > --- a/tools/lib/bpf/elf.c > > > +++ b/tools/lib/bpf/elf.c > > > @@ -59,6 +59,108 @@ static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) > > > return NULL; > > > } > > > > > > +struct elf_sym { > > > + const char *name; > > > + GElf_Sym sym; > > > + GElf_Shdr sh; > > > +}; > > > + > > > > if we want to use elf_sym_iter outside of elf.c, this should be in > > libbpf_internal.h? > > yes eventually, but all the helper functions using elf_sym_iter that > I added later are in elf.c, so there's no need atm > > SNIP > > > > + > > > +static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) > > > +{ > > > + struct elf_sym *ret = &iter->sym; > > > + GElf_Sym *sym = &ret->sym; > > > + const char *name = NULL; > > > + Elf_Scn *sym_scn; > > > + size_t idx; > > > + > > > + for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) { > > > + if (!gelf_getsym(iter->syms, idx, sym)) > > > + continue; > > > + if (GELF_ST_TYPE(sym->st_info) != iter->st_type) > > > + continue; > > > + name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name); > > > + if (!name) > > > + continue; > > > + > > > + /* Transform symbol's virtual address (absolute for > > > + * binaries and relative for shared libs) into file > > > + * offset, which is what kernel is expecting for > > > + * uprobe/uretprobe attachment. > > > + * See Documentation/trace/uprobetracer.rst for more > > > + * details. > > > + * This is done by looking up symbol's containing > > > + * section's header and using iter's virtual address > > > + * (sh_addr) and corresponding file offset (sh_offset) > > > + * to transform sym.st_value (virtual address) into > > > + * desired final file offset. > > > + */ > > > > this comment is misplaced? we don't do the translation here > > right, should be placed at the elf_sym_offset function > > > > > > + sym_scn = elf_getscn(iter->elf, sym->st_shndx); > > > + if (!sym_scn) > > > + continue; > > > + if (!gelf_getshdr(sym_scn, &ret->sh)) > > > + continue; > > > + > > > + iter->next_sym_idx = idx + 1; > > > + ret->name = name; > > > + return ret; > > > + } > > > + > > > + return NULL; > > > +} > > > + > > > +static unsigned long elf_sym_offset(struct elf_sym *sym) > > > +{ > > > + return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset; > > > +} > > > + > > > /* Find offset of function name in the provided ELF object. "binary_path" is > > > * the path to the ELF binary represented by "elf", and only used for error > > > * reporting matters. "name" matches symbol name or name@@LIB for library > > > @@ -90,64 +192,36 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) > > > * reported as a warning/error. > > > */ > > > for (i = 0; i < ARRAY_SIZE(sh_types); i++) { > > > - size_t nr_syms, strtabidx, idx; > > > - Elf_Data *symbols = NULL; > > > - Elf_Scn *scn = NULL; > > > + struct elf_sym_iter iter; > > > + struct elf_sym *sym; > > > int last_bind = -1; > > > - const char *sname; > > > - GElf_Shdr sh; > > > + int curr_bind; > > > > OCD nit: > > > > $ rg 'curr(_|\b)' | wc -l > > 8 > > $ rg 'cur(_|\b)' | wc -l > > 148 > > > > and those 8 I consider an unfortunate accident ;) let's standardize on > > using "cur" consistently > > ok.. probably easier to make that change than treat ocd ;-) > thanks :) > > > > > > > > - scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL); > > > - if (!scn) { > > > - pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", > > > - binary_path); > > > - continue; > > > - } > > > - if (!gelf_getshdr(scn, &sh)) > > > - continue; > > > - strtabidx = sh.sh_link; > > > - symbols = elf_getdata(scn, 0); > > > - if (!symbols) { > > > - pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", > > > - binary_path, elf_errmsg(-1)); > > > - ret = -LIBBPF_ERRNO__FORMAT; > > > + ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC); > > > + if (ret) { > > > + if (ret == -ENOENT) > > > + continue; > > > goto out; > > > > another styling nit: let's avoid unnecessary nesting of ifs: > > > > if (ret == -ENOENT) > > continue; > > if (ret) > > goto out; > > > > simple and clean > > ok > > > > > > > > } > > > - nr_syms = symbols->d_size / sh.sh_entsize; > > > - > > > - for (idx = 0; idx < nr_syms; idx++) { > > > - int curr_bind; > > > - GElf_Sym sym; > > > - Elf_Scn *sym_scn; > > > - GElf_Shdr sym_sh; > > > - > > > - if (!gelf_getsym(symbols, idx, &sym)) > > > - continue; > > > - > > > - if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) > > > - continue; > > > - > > > - sname = elf_strptr(elf, strtabidx, sym.st_name); > > > - if (!sname) > > > - continue; > > > - > > > - curr_bind = GELF_ST_BIND(sym.st_info); > > > > > > + while ((sym = elf_sym_iter_next(&iter))) { > > > /* User can specify func, func@@LIB or func@@LIB_VERSION. */ > > > - if (strncmp(sname, name, name_len) != 0) > > > + if (strncmp(sym->name, name, name_len) != 0) > > > continue; > > > /* ...but we don't want a search for "foo" to match 'foo2" also, so any > > > * additional characters in sname should be of the form "@@LIB". > > > */ > > > - if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@') > > > + if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') > > > continue; > > > > > > - if (ret >= 0) { > > > + curr_bind = GELF_ST_BIND(sym->sym.st_info); > > > + > > > + if (ret > 0) { > > > > used to be >=, why the change? > > the original code initialized ret with -ENOENT and did not change > its value till this point, so the condition never triggered for > the first loop, but it did for the new code because we now use ret > earlier in: > > ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC); > > also the check makes sense to me only if ret > 0 .. when we find > a duplicate value for symbol there is a subtle difference if we have some unresolved func with addr 0 or something. But you are right, it's probably not very sensical, we can keep it as > 0. > > jirka