Basic types in DWARF repeat frequently and traversing the DIEs using libdw is relatively slow. Add a simple hashtable based cache for the processed DIEs. Signed-off-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx> --- scripts/gendwarfksyms/Makefile | 1 + scripts/gendwarfksyms/die.c | 170 +++++++++++++++++++++++ scripts/gendwarfksyms/dwarf.c | 192 ++++++++++++++++++++------ scripts/gendwarfksyms/gendwarfksyms.c | 6 + scripts/gendwarfksyms/gendwarfksyms.h | 58 +++++++- 5 files changed, 382 insertions(+), 45 deletions(-) create mode 100644 scripts/gendwarfksyms/die.c diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile index 623f8fc975ea..fcbac52ca00a 100644 --- a/scripts/gendwarfksyms/Makefile +++ b/scripts/gendwarfksyms/Makefile @@ -1,6 +1,7 @@ hostprogs-always-y += gendwarfksyms gendwarfksyms-objs += gendwarfksyms.o +gendwarfksyms-objs += die.o gendwarfksyms-objs += dwarf.o gendwarfksyms-objs += symbols.o diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c new file mode 100644 index 000000000000..ad6ba435d9dd --- /dev/null +++ b/scripts/gendwarfksyms/die.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2024 Google LLC + */ + +#include <string.h> +#include "gendwarfksyms.h" + +#define DIE_HASH_BITS 20 + +/* uintptr_t die->addr -> struct die * */ +static DEFINE_HASHTABLE(die_map, DIE_HASH_BITS); + +static unsigned int map_hits; +static unsigned int map_misses; + +static int create_die(Dwarf_Die *die, struct die **res) +{ + struct die *cd; + + cd = malloc(sizeof(struct die)); + if (!cd) { + error("malloc failed"); + return -1; + } + + cd->state = INCOMPLETE; + cd->mapped = false; + cd->fqn = NULL; + cd->tag = -1; + cd->addr = (uintptr_t)die->addr; + cd->list = NULL; + + hash_add(die_map, &cd->hash, addr_hash(cd->addr)); + *res = cd; + return 0; +} + +int __die_map_get(uintptr_t addr, enum die_state state, struct die **res) +{ + struct die *cd; + + hash_for_each_possible(die_map, cd, hash, addr_hash(addr)) { + if (cd->addr == addr && cd->state == state) { + *res = cd; + return 0; + } + } + + return -1; +} + +int die_map_get(Dwarf_Die *die, enum die_state state, struct die **res) +{ + if (__die_map_get((uintptr_t)die->addr, state, res) == 0) { + map_hits++; + return 0; + } + + map_misses++; + return check(create_die(die, res)); +} + +static void reset_die(struct die *cd) +{ + struct die_fragment *tmp; + struct die_fragment *df = cd->list; + + while (df) { + if (df->type == STRING) + free(df->data.str); + + tmp = df->next; + free(df); + df = tmp; + } + + cd->state = INCOMPLETE; + cd->mapped = false; + if (cd->fqn) + free(cd->fqn); + cd->fqn = NULL; + cd->tag = -1; + cd->addr = 0; + cd->list = NULL; +} + +void die_map_free(void) +{ + struct hlist_node *tmp; + unsigned int stats[LAST + 1]; + struct die *cd; + int i; + + memset(stats, 0, sizeof(stats)); + + hash_for_each_safe(die_map, i, tmp, cd, hash) { + stats[cd->state]++; + reset_die(cd); + free(cd); + } + hash_init(die_map); + + if ((map_hits + map_misses > 0)) + debug("hits %u, misses %u (hit rate %.02f%%)", map_hits, + map_misses, + (100.0f * map_hits) / (map_hits + map_misses)); + + for (i = 0; i <= LAST; i++) + debug("%s: %u entries", die_state_name(i), stats[i]); +} + +static int append_item(struct die *cd, struct die_fragment **res) +{ + struct die_fragment *prev; + struct die_fragment *df; + + df = malloc(sizeof(struct die_fragment)); + if (!df) { + error("malloc failed"); + return -1; + } + + df->type = EMPTY; + df->next = NULL; + + prev = cd->list; + while (prev && prev->next) + prev = prev->next; + + if (prev) + prev->next = df; + else + cd->list = df; + + *res = df; + return 0; +} + +int die_map_add_string(struct die *cd, const char *str) +{ + struct die_fragment *df; + + if (!cd) + return 0; + + check(append_item(cd, &df)); + + df->data.str = strdup(str); + if (!df->data.str) { + error("strdup failed"); + return -1; + } + + df->type = STRING; + return 0; +} + +int die_map_add_die(struct die *cd, struct die *child) +{ + struct die_fragment *df; + + if (!cd) + return 0; + + check(append_item(cd, &df)); + df->data.addr = child->addr; + df->type = DIE; + return 0; +} diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c index a37c9049d18e..82b966269acd 100644 --- a/scripts/gendwarfksyms/dwarf.c +++ b/scripts/gendwarfksyms/dwarf.c @@ -61,19 +61,20 @@ static bool is_export_symbol(struct state *state, Dwarf_Die *die) /* * Type string processing */ -static int process(struct state *state, const char *s) +static int process(struct state *state, struct die *cache, const char *s) { s = s ?: "<null>"; if (debug) fputs(s, stderr); - return 0; + return check(die_map_add_string(cache, s)); } #define MAX_FMT_BUFFER_SIZE 128 -static int process_fmt(struct state *state, const char *fmt, ...) +static int process_fmt(struct state *state, struct die *cache, const char *fmt, + ...) { char buf[MAX_FMT_BUFFER_SIZE]; va_list args; @@ -86,50 +87,103 @@ static int process_fmt(struct state *state, const char *fmt, ...) error("vsnprintf overflow: increase MAX_FMT_BUFFER_SIZE"); res = -1; } else { - res = check(process(state, buf)); + res = check(process(state, cache, buf)); } va_end(args); return res; } -/* Process a fully qualified name from DWARF scopes */ -static int process_fqn(struct state *state, Dwarf_Die *die) +#define MAX_FQN_SIZE 64 + +/* Get a fully qualified name from DWARF scopes */ +static int get_fqn(struct state *state, Dwarf_Die *die, char **fqn) { + const char *list[MAX_FQN_SIZE]; Dwarf_Die *scopes = NULL; - const char *name; + int count = 0; + int len = 0; int res; int i; + *fqn = NULL; + res = checkp(dwarf_getscopes_die(die, &scopes)); if (!res) { - name = get_name(die); - name = name ?: "<unnamed>"; - return check(process(state, name)); + list[count] = get_name(die); + + if (!list[count]) + return 0; + + len += strlen(list[count]); + count++; + + goto done; } - for (i = res - 1; i >= 0; i--) { + for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) { if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit) continue; - name = get_name(&scopes[i]); - name = name ?: "<unnamed>"; - check(process(state, name)); - if (i > 0) - check(process(state, "::")); + /* + * If any of the DIEs in the scope is missing a name, consider + * the DIE to be unnamed. + */ + list[count] = get_name(&scopes[i]); + + if (!list[count]) { + free(scopes); + return 0; + } + + len += strlen(list[count]); + count++; + + if (i > 0) { + list[count++] = "::"; + len += 2; + } } + if (count == MAX_FQN_SIZE) + warn("increase MAX_FQN_SIZE: reached the maximum"); + free(scopes); + +done: + *fqn = malloc(len + 1); + if (!*fqn) { + error("malloc failed"); + return -1; + } + + **fqn = '\0'; + + for (i = 0; i < count; i++) + strcat(*fqn, list[i]); + return 0; } +static int process_fqn(struct state *state, struct die *cache, Dwarf_Die *die) +{ + const char *fqn; + + if (!cache->fqn) + check(get_fqn(state, die, &cache->fqn)); + + fqn = cache->fqn; + fqn = fqn ?: "<unnamed>"; + return check(process(state, cache, fqn)); +} + #define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute) \ - static int process_##attribute##_attr(struct state *state, \ - Dwarf_Die *die) \ + static int process_##attribute##_attr( \ + struct state *state, struct die *cache, Dwarf_Die *die) \ { \ Dwarf_Word value; \ if (get_udata_attr(die, DW_AT_##attribute, &value)) \ - check(process_fmt(state, \ + check(process_fmt(state, cache, \ " " #attribute "(%" PRIu64 ")", \ value)); \ return 0; \ @@ -144,8 +198,9 @@ bool match_all(Dwarf_Die *die) return true; } -int process_die_container(struct state *state, Dwarf_Die *die, - die_callback_t func, die_match_callback_t match) +int process_die_container(struct state *state, struct die *cache, + Dwarf_Die *die, die_callback_t func, + die_match_callback_t match) { Dwarf_Die current; int res; @@ -153,48 +208,99 @@ int process_die_container(struct state *state, Dwarf_Die *die, res = checkp(dwarf_child(die, ¤t)); while (!res) { if (match(¤t)) - check(func(state, ¤t)); + check(func(state, cache, ¤t)); res = checkp(dwarf_siblingof(¤t, ¤t)); } return 0; } -static int process_type(struct state *state, Dwarf_Die *die); +static int process_type(struct state *state, struct die *parent, + Dwarf_Die *die); -static int process_type_attr(struct state *state, Dwarf_Die *die) +static int process_type_attr(struct state *state, struct die *cache, + Dwarf_Die *die) { Dwarf_Die type; if (get_ref_die_attr(die, DW_AT_type, &type)) - return check(process_type(state, &type)); + return check(process_type(state, cache, &type)); /* Compilers can omit DW_AT_type -- print out 'void' to clarify */ - return check(process(state, "base_type void")); + return check(process(state, cache, "base_type void")); } -static int process_base_type(struct state *state, Dwarf_Die *die) +static int process_base_type(struct state *state, struct die *cache, + Dwarf_Die *die) { - check(process(state, "base_type ")); - check(process_fqn(state, die)); - check(process_byte_size_attr(state, die)); - check(process_encoding_attr(state, die)); - return check(process_alignment_attr(state, die)); + check(process(state, cache, "base_type ")); + check(process_fqn(state, cache, die)); + check(process_byte_size_attr(state, cache, die)); + check(process_encoding_attr(state, cache, die)); + return check(process_alignment_attr(state, cache, die)); } -static int process_type(struct state *state, Dwarf_Die *die) +static int process_cached(struct state *state, struct die *cache, + Dwarf_Die *die) { + struct die_fragment *df = cache->list; + Dwarf_Die child; + + while (df) { + switch (df->type) { + case STRING: + check(process(state, NULL, df->data.str)); + break; + case DIE: + if (!dwarf_die_addr_die(state->dbg, + (void *)df->data.addr, + &child)) { + error("dwarf_die_addr_die failed"); + return -1; + } + check(process_type(state, NULL, &child)); + break; + default: + error("empty die_fragment"); + return -1; + } + df = df->next; + } + + return 0; +} + +static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) +{ + struct die *cache = NULL; int tag = dwarf_tag(die); + /* + * If we have the DIE already cached, use it instead of walking + * through DWARF. + */ + check(die_map_get(die, COMPLETE, &cache)); + + if (cache->state == COMPLETE) { + check(process_cached(state, cache, die)); + check(die_map_add_die(parent, cache)); + return 0; + } + switch (tag) { case DW_TAG_base_type: - check(process_base_type(state, die)); + check(process_base_type(state, cache, die)); break; default: debug("unimplemented type: %x", tag); break; } + /* Update cache state and append to the parent (if any) */ + cache->tag = tag; + cache->state = COMPLETE; + check(die_map_add_die(parent, cache)); + return 0; } @@ -203,14 +309,14 @@ static int process_type(struct state *state, Dwarf_Die *die) */ static int process_subprogram(struct state *state, Dwarf_Die *die) { - return check(process(state, "subprogram;\n")); + return check(process(state, NULL, "subprogram;\n")); } static int process_variable(struct state *state, Dwarf_Die *die) { - check(process(state, "variable ")); - check(process_type_attr(state, die)); - return check(process(state, ";\n")); + check(process(state, NULL, "variable ")); + check(process_type_attr(state, NULL, die)); + return check(process(state, NULL, ";\n")); } static int process_symbol_ptr(struct state *state, Dwarf_Die *die) @@ -235,7 +341,8 @@ static int process_symbol_ptr(struct state *state, Dwarf_Die *die) return check(process_variable(state, &ptr_type)); } -static int process_exported_symbols(struct state *state, Dwarf_Die *die) +static int process_exported_symbols(struct state *state, struct die *cache, + Dwarf_Die *die) { int tag = dwarf_tag(die); @@ -244,8 +351,9 @@ static int process_exported_symbols(struct state *state, Dwarf_Die *die) case DW_TAG_namespace: case DW_TAG_class_type: case DW_TAG_structure_type: - return check(process_die_container( - state, die, process_exported_symbols, match_all)); + return check(process_die_container(state, cache, die, + process_exported_symbols, + match_all)); /* Possible exported symbols */ case DW_TAG_subprogram: @@ -273,5 +381,5 @@ int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie) struct state state = { .mod = mod, .dbg = dbg }; return check(process_die_container( - &state, cudie, process_exported_symbols, match_all)); + &state, NULL, cudie, process_exported_symbols, match_all)); } diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c index e2f8ee5a4bf3..55a7fc902bf4 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.c +++ b/scripts/gendwarfksyms/gendwarfksyms.c @@ -78,6 +78,10 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name, debug("%s", name); dbg = dwfl_module_getdwarf(mod, &dwbias); + /* + * Look for exported symbols in each CU, follow the DIE tree, and add + * the entries to die_map. + */ do { res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL); if (res < 0) { @@ -90,6 +94,8 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name, check(process_module(mod, dbg, &cudie)); } while (cu); + die_map_free(); + return DWARF_CB_OK; } diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h index 8f6acd1b8f8f..b280acceb114 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.h +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -76,6 +76,11 @@ static inline u32 name_hash(const char *name) return jhash(name, strlen(name), 0); } +static inline u32 addr_hash(uintptr_t addr) +{ + return jhash(&addr, sizeof(addr), 0); +} + struct symbol { const char *name; struct symbol_addr addr; @@ -88,6 +93,52 @@ extern int symbol_read_exports(FILE *file); extern int symbol_read_symtab(int fd); extern struct symbol *symbol_get(const char *name); +/* + * die.c + */ + +enum die_state { INCOMPLETE, COMPLETE, LAST = COMPLETE }; +enum die_fragment_type { EMPTY, STRING, DIE }; + +struct die_fragment { + enum die_fragment_type type; + union { + char *str; + uintptr_t addr; + } data; + struct die_fragment *next; +}; + +#define CASE_CONST_TO_STR(name) \ + case name: \ + return #name; + +static inline const char *die_state_name(enum die_state state) +{ + switch (state) { + default: + CASE_CONST_TO_STR(INCOMPLETE) + CASE_CONST_TO_STR(COMPLETE) + } +} + +struct die { + enum die_state state; + bool mapped; + char *fqn; + int tag; + uintptr_t addr; + struct die_fragment *list; + struct hlist_node hash; +}; + +extern int __die_map_get(uintptr_t addr, enum die_state state, + struct die **res); +extern int die_map_get(Dwarf_Die *die, enum die_state state, struct die **res); +extern int die_map_add_string(struct die *pd, const char *str); +extern int die_map_add_die(struct die *pd, struct die *child); +extern void die_map_free(void); + /* * dwarf.c */ @@ -99,12 +150,13 @@ struct state { Dwarf_Die die; }; -typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die); +typedef int (*die_callback_t)(struct state *state, struct die *cache, + Dwarf_Die *die); typedef bool (*die_match_callback_t)(Dwarf_Die *die); extern bool match_all(Dwarf_Die *die); -extern int process_die_container(struct state *state, Dwarf_Die *die, - die_callback_t func, +extern int process_die_container(struct state *state, struct die *cache, + Dwarf_Die *die, die_callback_t func, die_match_callback_t match); extern int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie); -- 2.46.0.184.g6999bdac58-goog