On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@xxxxxxxxxx> wrote: > > Basic types in DWARF repeat frequently and traversing the DIEs using > libdw is relatively slow. Add a simple hashtable based cache for the > processed DIEs. > > Signed-off-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx> > --- > scripts/gendwarfksyms/Makefile | 1 + > scripts/gendwarfksyms/die.c | 170 +++++++++++++++++++++++ > scripts/gendwarfksyms/dwarf.c | 192 ++++++++++++++++++++------ > scripts/gendwarfksyms/gendwarfksyms.c | 6 + > scripts/gendwarfksyms/gendwarfksyms.h | 58 +++++++- > 5 files changed, 382 insertions(+), 45 deletions(-) > create mode 100644 scripts/gendwarfksyms/die.c > > diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile > index 623f8fc975ea..fcbac52ca00a 100644 > --- a/scripts/gendwarfksyms/Makefile > +++ b/scripts/gendwarfksyms/Makefile > @@ -1,6 +1,7 @@ > hostprogs-always-y += gendwarfksyms > > gendwarfksyms-objs += gendwarfksyms.o > +gendwarfksyms-objs += die.o > gendwarfksyms-objs += dwarf.o > gendwarfksyms-objs += symbols.o > > diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c > new file mode 100644 > index 000000000000..ad6ba435d9dd > --- /dev/null > +++ b/scripts/gendwarfksyms/die.c > @@ -0,0 +1,170 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Copyright (C) 2024 Google LLC > + */ > + > +#include <string.h> > +#include "gendwarfksyms.h" > + > +#define DIE_HASH_BITS 20 > + > +/* uintptr_t die->addr -> struct die * */ > +static DEFINE_HASHTABLE(die_map, DIE_HASH_BITS); > + > +static unsigned int map_hits; > +static unsigned int map_misses; > + > +static int create_die(Dwarf_Die *die, struct die **res) > +{ > + struct die *cd; > + > + cd = malloc(sizeof(struct die)); > + if (!cd) { > + error("malloc failed"); > + return -1; > + } > + > + cd->state = INCOMPLETE; > + cd->mapped = false; > + cd->fqn = NULL; > + cd->tag = -1; > + cd->addr = (uintptr_t)die->addr; > + cd->list = NULL; > + > + hash_add(die_map, &cd->hash, addr_hash(cd->addr)); > + *res = cd; > + return 0; > +} > + > +int __die_map_get(uintptr_t addr, enum die_state state, struct die **res) > +{ > + struct die *cd; > + > + hash_for_each_possible(die_map, cd, hash, addr_hash(addr)) { > + if (cd->addr == addr && cd->state == state) { > + *res = cd; > + return 0; > + } > + } > + > + return -1; > +} > + > +int die_map_get(Dwarf_Die *die, enum die_state state, struct die **res) > +{ > + if (__die_map_get((uintptr_t)die->addr, state, res) == 0) { > + map_hits++; > + return 0; > + } > + > + map_misses++; > + return check(create_die(die, res)); > +} > + > +static void reset_die(struct die *cd) > +{ > + struct die_fragment *tmp; > + struct die_fragment *df = cd->list; > + > + while (df) { > + if (df->type == STRING) > + free(df->data.str); > + > + tmp = df->next; > + free(df); > + df = tmp; > + } > + > + cd->state = INCOMPLETE; > + cd->mapped = false; > + if (cd->fqn) > + free(cd->fqn); > + cd->fqn = NULL; > + cd->tag = -1; > + cd->addr = 0; > + cd->list = NULL; > +} > + > +void die_map_free(void) > +{ > + struct hlist_node *tmp; > + unsigned int stats[LAST + 1]; > + struct die *cd; > + int i; > + > + memset(stats, 0, sizeof(stats)); > + > + hash_for_each_safe(die_map, i, tmp, cd, hash) { > + stats[cd->state]++; > + reset_die(cd); > + free(cd); > + } > + hash_init(die_map); > + > + if ((map_hits + map_misses > 0)) > + debug("hits %u, misses %u (hit rate %.02f%%)", map_hits, > + map_misses, > + (100.0f * map_hits) / (map_hits + map_misses)); > + > + for (i = 0; i <= LAST; i++) > + debug("%s: %u entries", die_state_name(i), stats[i]); > +} > + > +static int append_item(struct die *cd, struct die_fragment **res) > +{ > + struct die_fragment *prev; > + struct die_fragment *df; > + > + df = malloc(sizeof(struct die_fragment)); > + if (!df) { > + error("malloc failed"); > + return -1; > + } > + > + df->type = EMPTY; > + df->next = NULL; > + > + prev = cd->list; > + while (prev && prev->next) > + prev = prev->next; So, this entirely traverses the singly-linked list every time a new item is appended to the tail. In my analysis, this while loop iterates for thousands of times in total for emitting each export symbol. Why isn't this list_add_tail()? -- Best Regards Masahiro Yamada