On 8/15/24 19:39, Sami Tolvanen wrote: > Basic types in DWARF repeat frequently and traversing the DIEs using > libdw is relatively slow. Add a simple hashtable based cache for the > processed DIEs. > > Signed-off-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx> > --- > scripts/gendwarfksyms/Makefile | 1 + > scripts/gendwarfksyms/die.c | 170 +++++++++++++++++++++++ > scripts/gendwarfksyms/dwarf.c | 192 ++++++++++++++++++++------ > scripts/gendwarfksyms/gendwarfksyms.c | 6 + > scripts/gendwarfksyms/gendwarfksyms.h | 58 +++++++- > 5 files changed, 382 insertions(+), 45 deletions(-) > create mode 100644 scripts/gendwarfksyms/die.c > > diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile > index 623f8fc975ea..fcbac52ca00a 100644 > --- a/scripts/gendwarfksyms/Makefile > +++ b/scripts/gendwarfksyms/Makefile > @@ -1,6 +1,7 @@ > hostprogs-always-y += gendwarfksyms > > gendwarfksyms-objs += gendwarfksyms.o > +gendwarfksyms-objs += die.o > gendwarfksyms-objs += dwarf.o > gendwarfksyms-objs += symbols.o > > diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c > new file mode 100644 > index 000000000000..ad6ba435d9dd > --- /dev/null > +++ b/scripts/gendwarfksyms/die.c > @@ -0,0 +1,170 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Copyright (C) 2024 Google LLC > + */ > + > +#include <string.h> > +#include "gendwarfksyms.h" > + > +#define DIE_HASH_BITS 20 > + > +/* uintptr_t die->addr -> struct die * */ > +static DEFINE_HASHTABLE(die_map, DIE_HASH_BITS); > + > +static unsigned int map_hits; > +static unsigned int map_misses; > + > +static int create_die(Dwarf_Die *die, struct die **res) > +{ > + struct die *cd; > + > + cd = malloc(sizeof(struct die)); > + if (!cd) { > + error("malloc failed"); > + return -1; > + } > + > + cd->state = INCOMPLETE; > + cd->mapped = false; > + cd->fqn = NULL; > + cd->tag = -1; > + cd->addr = (uintptr_t)die->addr; > + cd->list = NULL; > + > + hash_add(die_map, &cd->hash, addr_hash(cd->addr)); > + *res = cd; > + return 0; > +} > + > +int __die_map_get(uintptr_t addr, enum die_state state, struct die **res) > +{ > + struct die *cd; > + > + hash_for_each_possible(die_map, cd, hash, addr_hash(addr)) { > + if (cd->addr == addr && cd->state == state) { > + *res = cd; > + return 0; > + } > + } > + > + return -1; > +} > + > +int die_map_get(Dwarf_Die *die, enum die_state state, struct die **res) > +{ > + if (__die_map_get((uintptr_t)die->addr, state, res) == 0) { > + map_hits++; > + return 0; > + } > + > + map_misses++; > + return check(create_die(die, res)); > +} > + > +static void reset_die(struct die *cd) > +{ > + struct die_fragment *tmp; > + struct die_fragment *df = cd->list; > + > + while (df) { > + if (df->type == STRING) > + free(df->data.str); > + > + tmp = df->next; > + free(df); > + df = tmp; > + } > + > + cd->state = INCOMPLETE; > + cd->mapped = false; > + if (cd->fqn) > + free(cd->fqn); > + cd->fqn = NULL; > + cd->tag = -1; > + cd->addr = 0; > + cd->list = NULL; > +} > + > +void die_map_free(void) > +{ > + struct hlist_node *tmp; > + unsigned int stats[LAST + 1]; > + struct die *cd; > + int i; > + > + memset(stats, 0, sizeof(stats)); > + > + hash_for_each_safe(die_map, i, tmp, cd, hash) { > + stats[cd->state]++; > + reset_die(cd); > + free(cd); > + } > + hash_init(die_map); > + > + if ((map_hits + map_misses > 0)) Nit: Extra parentheses can be dropped. > + debug("hits %u, misses %u (hit rate %.02f%%)", map_hits, > + map_misses, > + (100.0f * map_hits) / (map_hits + map_misses)); > + > + for (i = 0; i <= LAST; i++) > + debug("%s: %u entries", die_state_name(i), stats[i]); > +} > + > +static int append_item(struct die *cd, struct die_fragment **res) > +{ > + struct die_fragment *prev; > + struct die_fragment *df; > + > + df = malloc(sizeof(struct die_fragment)); > + if (!df) { > + error("malloc failed"); > + return -1; > + } > + > + df->type = EMPTY; > + df->next = NULL; > + > + prev = cd->list; > + while (prev && prev->next) > + prev = prev->next; > + > + if (prev) > + prev->next = df; > + else > + cd->list = df; > + > + *res = df; > + return 0; > +} > + > +int die_map_add_string(struct die *cd, const char *str) > +{ > + struct die_fragment *df; > + > + if (!cd) > + return 0; > + > + check(append_item(cd, &df)); > + > + df->data.str = strdup(str); > + if (!df->data.str) { > + error("strdup failed"); > + return -1; > + } > + > + df->type = STRING; > + return 0; > +} > + > +int die_map_add_die(struct die *cd, struct die *child) > +{ > + struct die_fragment *df; > + > + if (!cd) > + return 0; > + > + check(append_item(cd, &df)); > + df->data.addr = child->addr; > + df->type = DIE; > + return 0; > +} > diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c > index a37c9049d18e..82b966269acd 100644 > --- a/scripts/gendwarfksyms/dwarf.c > +++ b/scripts/gendwarfksyms/dwarf.c > @@ -61,19 +61,20 @@ static bool is_export_symbol(struct state *state, Dwarf_Die *die) > /* > * Type string processing > */ > -static int process(struct state *state, const char *s) > +static int process(struct state *state, struct die *cache, const char *s) > { > s = s ?: "<null>"; > > if (debug) > fputs(s, stderr); > > - return 0; > + return check(die_map_add_string(cache, s)); > } > > #define MAX_FMT_BUFFER_SIZE 128 > > -static int process_fmt(struct state *state, const char *fmt, ...) > +static int process_fmt(struct state *state, struct die *cache, const char *fmt, > + ...) > { > char buf[MAX_FMT_BUFFER_SIZE]; > va_list args; > @@ -86,50 +87,103 @@ static int process_fmt(struct state *state, const char *fmt, ...) > error("vsnprintf overflow: increase MAX_FMT_BUFFER_SIZE"); > res = -1; > } else { > - res = check(process(state, buf)); > + res = check(process(state, cache, buf)); > } > > va_end(args); > return res; > } > > -/* Process a fully qualified name from DWARF scopes */ > -static int process_fqn(struct state *state, Dwarf_Die *die) > +#define MAX_FQN_SIZE 64 > + > +/* Get a fully qualified name from DWARF scopes */ > +static int get_fqn(struct state *state, Dwarf_Die *die, char **fqn) > { > + const char *list[MAX_FQN_SIZE]; > Dwarf_Die *scopes = NULL; > - const char *name; > + int count = 0; > + int len = 0; > int res; > int i; > > + *fqn = NULL; > + > res = checkp(dwarf_getscopes_die(die, &scopes)); > if (!res) { > - name = get_name(die); > - name = name ?: "<unnamed>"; > - return check(process(state, name)); > + list[count] = get_name(die); > + > + if (!list[count]) > + return 0; > + > + len += strlen(list[count]); > + count++; > + > + goto done; > } > > - for (i = res - 1; i >= 0; i--) { > + for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) { > if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit) > continue; > > - name = get_name(&scopes[i]); > - name = name ?: "<unnamed>"; > - check(process(state, name)); > - if (i > 0) > - check(process(state, "::")); > + /* > + * If any of the DIEs in the scope is missing a name, consider > + * the DIE to be unnamed. > + */ > + list[count] = get_name(&scopes[i]); > + > + if (!list[count]) { > + free(scopes); > + return 0; > + } This slightly changes how scopes with no name are processed which is unrelated to the added caching. The previous logic used "<unnamed>" for individual unnamed scopes. The new code in such a case returns an empty FQN which is turned in process_fqn() into "<unnamed>". This is likely ok in practice for this particular tool. In general, I think "<unnamed>" should be returned when the initial DIE is missing a name and something like "<anonymous>::foo" when an outer scope has no name. More importantly, using "<unnamed>" when a type has no name looks to me overly verbose, in particular, when it comes to the symtypes output. For instance, the current output for a 'const char *' parameter is: formal_parameter pointer_type <unnamed> { const_type <unnamed> { base_type char byte_size(1) encoding(8) } } byte_size(8) .. while the following should be sufficient and easier to grasp: formal_parameter pointer_type { const_type { base_type char byte_size(1) encoding(8) } } byte_size(8) > + > + len += strlen(list[count]); > + count++; > + > + if (i > 0) { > + list[count++] = "::"; > + len += 2; > + } > } > > + if (count == MAX_FQN_SIZE) > + warn("increase MAX_FQN_SIZE: reached the maximum"); > + > free(scopes); > + > +done: > + *fqn = malloc(len + 1); > + if (!*fqn) { > + error("malloc failed"); > + return -1; > + } > + > + **fqn = '\0'; > + > + for (i = 0; i < count; i++) > + strcat(*fqn, list[i]); Small optimization: This loop could be written as follows to avoid repeatedly searching the end of fqn: char *p = *fqn; for (i = 0; i < count; i++) p = stpcpy(p, list[i]); > + > return 0; > } > > +static int process_fqn(struct state *state, struct die *cache, Dwarf_Die *die) > +{ > + const char *fqn; > + > + if (!cache->fqn) > + check(get_fqn(state, die, &cache->fqn)); > + > + fqn = cache->fqn; > + fqn = fqn ?: "<unnamed>"; As a small optimization and for consistency, I would recommended to also cache the "<unnamed>" name to avoid repeatedly calling get_fqn() for such DIEs. > + return check(process(state, cache, fqn)); > +} > + > #define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute) \ > - static int process_##attribute##_attr(struct state *state, \ > - Dwarf_Die *die) \ > + static int process_##attribute##_attr( \ > + struct state *state, struct die *cache, Dwarf_Die *die) \ > { \ > Dwarf_Word value; \ > if (get_udata_attr(die, DW_AT_##attribute, &value)) \ > - check(process_fmt(state, \ > + check(process_fmt(state, cache, \ > " " #attribute "(%" PRIu64 ")", \ > value)); \ > return 0; \ > @@ -144,8 +198,9 @@ bool match_all(Dwarf_Die *die) > return true; > } > > -int process_die_container(struct state *state, Dwarf_Die *die, > - die_callback_t func, die_match_callback_t match) > +int process_die_container(struct state *state, struct die *cache, > + Dwarf_Die *die, die_callback_t func, > + die_match_callback_t match) > { > Dwarf_Die current; > int res; > @@ -153,48 +208,99 @@ int process_die_container(struct state *state, Dwarf_Die *die, > res = checkp(dwarf_child(die, ¤t)); > while (!res) { > if (match(¤t)) > - check(func(state, ¤t)); > + check(func(state, cache, ¤t)); > res = checkp(dwarf_siblingof(¤t, ¤t)); > } > > return 0; > } > > -static int process_type(struct state *state, Dwarf_Die *die); > +static int process_type(struct state *state, struct die *parent, > + Dwarf_Die *die); > > -static int process_type_attr(struct state *state, Dwarf_Die *die) > +static int process_type_attr(struct state *state, struct die *cache, > + Dwarf_Die *die) > { > Dwarf_Die type; > > if (get_ref_die_attr(die, DW_AT_type, &type)) > - return check(process_type(state, &type)); > + return check(process_type(state, cache, &type)); > > /* Compilers can omit DW_AT_type -- print out 'void' to clarify */ > - return check(process(state, "base_type void")); > + return check(process(state, cache, "base_type void")); > } > > -static int process_base_type(struct state *state, Dwarf_Die *die) > +static int process_base_type(struct state *state, struct die *cache, > + Dwarf_Die *die) > { > - check(process(state, "base_type ")); > - check(process_fqn(state, die)); > - check(process_byte_size_attr(state, die)); > - check(process_encoding_attr(state, die)); > - return check(process_alignment_attr(state, die)); > + check(process(state, cache, "base_type ")); > + check(process_fqn(state, cache, die)); > + check(process_byte_size_attr(state, cache, die)); > + check(process_encoding_attr(state, cache, die)); > + return check(process_alignment_attr(state, cache, die)); > } > > -static int process_type(struct state *state, Dwarf_Die *die) > +static int process_cached(struct state *state, struct die *cache, > + Dwarf_Die *die) > { > + struct die_fragment *df = cache->list; > + Dwarf_Die child; > + > + while (df) { > + switch (df->type) { > + case STRING: > + check(process(state, NULL, df->data.str)); > + break; > + case DIE: > + if (!dwarf_die_addr_die(state->dbg, > + (void *)df->data.addr, > + &child)) { > + error("dwarf_die_addr_die failed"); > + return -1; > + } > + check(process_type(state, NULL, &child)); > + break; > + default: > + error("empty die_fragment"); > + return -1; > + } > + df = df->next; > + } > + > + return 0; > +} > + > +static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) > +{ > + struct die *cache = NULL; > int tag = dwarf_tag(die); > > + /* > + * If we have the DIE already cached, use it instead of walking > + * through DWARF. > + */ > + check(die_map_get(die, COMPLETE, &cache)); > + > + if (cache->state == COMPLETE) { > + check(process_cached(state, cache, die)); > + check(die_map_add_die(parent, cache)); > + return 0; > + } > + > switch (tag) { > case DW_TAG_base_type: > - check(process_base_type(state, die)); > + check(process_base_type(state, cache, die)); > break; > default: > debug("unimplemented type: %x", tag); > break; > } > > + /* Update cache state and append to the parent (if any) */ > + cache->tag = tag; > + cache->state = COMPLETE; > + check(die_map_add_die(parent, cache)); > + > return 0; > } > > @@ -203,14 +309,14 @@ static int process_type(struct state *state, Dwarf_Die *die) > */ > static int process_subprogram(struct state *state, Dwarf_Die *die) > { > - return check(process(state, "subprogram;\n")); > + return check(process(state, NULL, "subprogram;\n")); > } > > static int process_variable(struct state *state, Dwarf_Die *die) > { > - check(process(state, "variable ")); > - check(process_type_attr(state, die)); > - return check(process(state, ";\n")); > + check(process(state, NULL, "variable ")); > + check(process_type_attr(state, NULL, die)); > + return check(process(state, NULL, ";\n")); > } > > static int process_symbol_ptr(struct state *state, Dwarf_Die *die) > @@ -235,7 +341,8 @@ static int process_symbol_ptr(struct state *state, Dwarf_Die *die) > return check(process_variable(state, &ptr_type)); > } > > -static int process_exported_symbols(struct state *state, Dwarf_Die *die) > +static int process_exported_symbols(struct state *state, struct die *cache, > + Dwarf_Die *die) > { > int tag = dwarf_tag(die); > > @@ -244,8 +351,9 @@ static int process_exported_symbols(struct state *state, Dwarf_Die *die) > case DW_TAG_namespace: > case DW_TAG_class_type: > case DW_TAG_structure_type: > - return check(process_die_container( > - state, die, process_exported_symbols, match_all)); > + return check(process_die_container(state, cache, die, > + process_exported_symbols, > + match_all)); > > /* Possible exported symbols */ > case DW_TAG_subprogram: > @@ -273,5 +381,5 @@ int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie) > struct state state = { .mod = mod, .dbg = dbg }; > > return check(process_die_container( > - &state, cudie, process_exported_symbols, match_all)); > + &state, NULL, cudie, process_exported_symbols, match_all)); > } > diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c > index e2f8ee5a4bf3..55a7fc902bf4 100644 > --- a/scripts/gendwarfksyms/gendwarfksyms.c > +++ b/scripts/gendwarfksyms/gendwarfksyms.c > @@ -78,6 +78,10 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name, > debug("%s", name); > dbg = dwfl_module_getdwarf(mod, &dwbias); > > + /* > + * Look for exported symbols in each CU, follow the DIE tree, and add > + * the entries to die_map. > + */ > do { > res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL); > if (res < 0) { > @@ -90,6 +94,8 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name, > check(process_module(mod, dbg, &cudie)); > } while (cu); > > + die_map_free(); > + > return DWARF_CB_OK; > } > > diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h > index 8f6acd1b8f8f..b280acceb114 100644 > --- a/scripts/gendwarfksyms/gendwarfksyms.h > +++ b/scripts/gendwarfksyms/gendwarfksyms.h > @@ -76,6 +76,11 @@ static inline u32 name_hash(const char *name) > return jhash(name, strlen(name), 0); > } > > +static inline u32 addr_hash(uintptr_t addr) > +{ > + return jhash(&addr, sizeof(addr), 0); > +} > + > struct symbol { > const char *name; > struct symbol_addr addr; > @@ -88,6 +93,52 @@ extern int symbol_read_exports(FILE *file); > extern int symbol_read_symtab(int fd); > extern struct symbol *symbol_get(const char *name); > > +/* > + * die.c > + */ > + > +enum die_state { INCOMPLETE, COMPLETE, LAST = COMPLETE }; > +enum die_fragment_type { EMPTY, STRING, DIE }; Nit: I would suggest to prefix the enum values, for example, STATE_INCOMPLETE, ... and FRAGMENT_EMPTY, ... > + > +struct die_fragment { > + enum die_fragment_type type; > + union { > + char *str; > + uintptr_t addr; > + } data; > + struct die_fragment *next; > +}; > + > +#define CASE_CONST_TO_STR(name) \ > + case name: \ > + return #name; > + > +static inline const char *die_state_name(enum die_state state) > +{ > + switch (state) { > + default: > + CASE_CONST_TO_STR(INCOMPLETE) > + CASE_CONST_TO_STR(COMPLETE) > + } > +} > + > +struct die { > + enum die_state state; > + bool mapped; > + char *fqn; > + int tag; > + uintptr_t addr; > + struct die_fragment *list; > + struct hlist_node hash; > +}; > + > +extern int __die_map_get(uintptr_t addr, enum die_state state, > + struct die **res); > +extern int die_map_get(Dwarf_Die *die, enum die_state state, struct die **res); > +extern int die_map_add_string(struct die *pd, const char *str); > +extern int die_map_add_die(struct die *pd, struct die *child); > +extern void die_map_free(void); > + > /* > * dwarf.c > */ > @@ -99,12 +150,13 @@ struct state { > Dwarf_Die die; > }; > > -typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die); > +typedef int (*die_callback_t)(struct state *state, struct die *cache, > + Dwarf_Die *die); > typedef bool (*die_match_callback_t)(Dwarf_Die *die); > extern bool match_all(Dwarf_Die *die); > > -extern int process_die_container(struct state *state, Dwarf_Die *die, > - die_callback_t func, > +extern int process_die_container(struct state *state, struct die *cache, > + Dwarf_Die *die, die_callback_t func, > die_match_callback_t match); > > extern int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie); -- Thanks, Petr