On 10/8/24 20:38, Sami Tolvanen wrote: > Add support for producing genksyms-style symtypes files. Process > die_map to find the longest expansions for each type, and use symtypes > references in type definitions. The basic file format is similar to > genksyms, with two notable exceptions: > > 1. Type names with spaces (common with Rust) in references are > wrapped in single quotes. E.g.: > > s#'core::result::Result<u8, core::num::error::ParseIntError>' > > 2. The actual type definition is the simple parsed DWARF format we > output with --dump-dies, not the preprocessed C-style format > genksyms produces. > > Signed-off-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx> > Acked-by: Neal Gompa <neal@xxxxxxxxx> > --- > scripts/gendwarfksyms/Makefile | 1 + > scripts/gendwarfksyms/die.c | 11 + > scripts/gendwarfksyms/dwarf.c | 1 + > scripts/gendwarfksyms/gendwarfksyms.c | 36 ++- > scripts/gendwarfksyms/gendwarfksyms.h | 19 ++ > scripts/gendwarfksyms/symbols.c | 4 +- > scripts/gendwarfksyms/types.c | 359 ++++++++++++++++++++++++++ > 7 files changed, 428 insertions(+), 3 deletions(-) > create mode 100644 scripts/gendwarfksyms/types.c > > diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile > index c06145d84df8..6540282dc746 100644 > --- a/scripts/gendwarfksyms/Makefile > +++ b/scripts/gendwarfksyms/Makefile > @@ -6,5 +6,6 @@ gendwarfksyms-objs += cache.o > gendwarfksyms-objs += die.o > gendwarfksyms-objs += dwarf.o > gendwarfksyms-objs += symbols.o > +gendwarfksyms-objs += types.o > > HOSTLDLIBS_gendwarfksyms := -ldw -lelf > diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c > index 2829387fd815..df1ca3a032bb 100644 > --- a/scripts/gendwarfksyms/die.c > +++ b/scripts/gendwarfksyms/die.c > @@ -22,6 +22,7 @@ static inline unsigned int die_hash(uintptr_t addr, enum die_state state) > static void init_die(struct die *cd) > { > cd->state = DIE_INCOMPLETE; > + cd->mapped = false; > cd->fqn = NULL; > cd->tag = -1; > cd->addr = 0; > @@ -83,6 +84,16 @@ static void reset_die(struct die *cd) > init_die(cd); > } > > +void die_map_for_each(die_map_callback_t func, void *arg) > +{ > + struct hlist_node *tmp; > + struct die *cd; > + > + hash_for_each_safe(die_map, cd, tmp, hash) { > + func(cd, arg); > + } > +} > + > void die_map_free(void) > { > struct hlist_node *tmp; > diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c > index d67cac16f8fb..e1a9e9061b1d 100644 > --- a/scripts/gendwarfksyms/dwarf.c > +++ b/scripts/gendwarfksyms/dwarf.c > @@ -728,6 +728,7 @@ static void process_symbol(struct state *state, Dwarf_Die *die, > { > debug("%s", state->sym->name); > check(process_func(state, NULL, die)); > + state->sym->state = SYMBOL_MAPPED; > if (dump_dies) > fputs("\n", stderr); > } > diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c > index d40692a703bb..24c87523fc3a 100644 > --- a/scripts/gendwarfksyms/gendwarfksyms.c > +++ b/scripts/gendwarfksyms/gendwarfksyms.c > @@ -21,6 +21,11 @@ int debug; > int dump_dies; > /* Print debugging information about die_map changes */ > int dump_die_map; > +/* Print out type strings (i.e. type_map) */ > +int dump_types; > +/* Write a symtypes file */ > +int symtypes; > +static const char *symtypes_file; > > static void usage(void) > { > @@ -29,6 +34,8 @@ static void usage(void) > " -d, --debug Print debugging information\n" > " --dump-dies Dump DWARF DIE contents\n" > " --dump-die-map Print debugging information about die_map changes\n" > + " --dump-types Dump type strings\n" > + " -T, --symtypes file Write a symtypes file\n" > " -h, --help Print this message\n" > "\n", > stderr); > @@ -41,6 +48,7 @@ static int process_module(Dwfl_Module *mod, void **userdata, const char *name, > Dwarf_Die cudie; > Dwarf_CU *cu = NULL; > Dwarf *dbg; > + FILE *symfile = arg; > int res; > > debug("%s", name); > @@ -60,6 +68,10 @@ static int process_module(Dwfl_Module *mod, void **userdata, const char *name, > process_cu(&cudie); > } while (cu); > > + /* > + * Use die_map to expand type strings and write them to `symfile`. > + */ > + generate_symtypes(symfile); > die_map_free(); > > return DWARF_CB_OK; > @@ -72,22 +84,29 @@ static const Dwfl_Callbacks callbacks = { > > int main(int argc, char **argv) > { > + FILE *symfile = NULL; > unsigned int n; > int opt; > > struct option opts[] = { { "debug", 0, NULL, 'd' }, > { "dump-dies", 0, &dump_dies, 1 }, > { "dump-die-map", 0, &dump_die_map, 1 }, > + { "dump-types", 0, &dump_types, 1 }, > + { "symtypes", 1, NULL, 'T' }, > { "help", 0, NULL, 'h' }, > { 0, 0, NULL, 0 } }; > > - while ((opt = getopt_long(argc, argv, "dh", opts, NULL)) != EOF) { > + while ((opt = getopt_long(argc, argv, "dT:h", opts, NULL)) != EOF) { > switch (opt) { > case 0: > break; > case 'd': > debug = 1; > break; > + case 'T': > + symtypes = 1; > + symtypes_file = optarg; > + break; > case 'h': > usage(); > return 0; > @@ -107,6 +126,16 @@ int main(int argc, char **argv) > > symbol_read_exports(stdin); > > + if (symtypes_file) { > + symfile = fopen(symtypes_file, "w"); > + > + if (!symfile) { > + error("fopen failed for '%s': %s", symtypes_file, > + strerror(errno)); > + return -1; Nit: The 'return -1;' statement is not needed since error() doesn't return. > + } > + } > + > for (n = optind; n < argc; n++) { > Dwfl *dwfl; > int fd; > @@ -129,11 +158,14 @@ int main(int argc, char **argv) > > dwfl_report_end(dwfl, NULL, NULL); > > - if (dwfl_getmodules(dwfl, &process_module, NULL, 0)) > + if (dwfl_getmodules(dwfl, &process_module, symfile, 0)) > error("dwfl_getmodules failed for '%s'", argv[n]); > > dwfl_end(dwfl); > } > > + if (symfile) > + check(fclose(symfile)); > + > return 0; > } > diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h > index 126916e85ee0..e47b5e967520 100644 > --- a/scripts/gendwarfksyms/gendwarfksyms.h > +++ b/scripts/gendwarfksyms/gendwarfksyms.h > @@ -25,6 +25,8 @@ > extern int debug; > extern int dump_dies; > extern int dump_die_map; > +extern int dump_types; > +extern int symtypes; > > /* > * Output helpers > @@ -93,6 +95,11 @@ static inline unsigned int addr_hash(uintptr_t addr) > return hash_ptr((const void *)addr); > } > > +enum symbol_state { > + SYMBOL_UNPROCESSED, > + SYMBOL_MAPPED, > +}; > + > struct symbol_addr { > uint32_t section; > Elf64_Addr address; > @@ -103,6 +110,8 @@ struct symbol { > struct symbol_addr addr; > struct hlist_node addr_hash; > struct hlist_node name_hash; > + enum symbol_state state; > + uintptr_t die_addr; > }; > > typedef void (*symbol_callback_t)(struct symbol *, void *arg); > @@ -156,6 +165,7 @@ static inline const char *die_state_name(enum die_state state) > > struct die { > enum die_state state; > + bool mapped; > char *fqn; > int tag; > uintptr_t addr; > @@ -163,10 +173,13 @@ struct die { > struct hlist_node hash; > }; > > +typedef void (*die_map_callback_t)(struct die *, void *arg); > + > int __die_map_get(uintptr_t addr, enum die_state state, struct die **res); > struct die *die_map_get(Dwarf_Die *die, enum die_state state); > void die_map_add_string(struct die *pd, const char *str); > void die_map_add_linebreak(struct die *pd, int linebreak); > +void die_map_for_each(die_map_callback_t func, void *arg); > void die_map_add_die(struct die *pd, struct die *child); > void die_map_free(void); > > @@ -228,4 +241,10 @@ int process_die_container(struct state *state, struct die *cache, > > void process_cu(Dwarf_Die *cudie); > > +/* > + * types.c > + */ > + > +void generate_symtypes(FILE *file); > + > #endif /* __GENDWARFKSYMS_H */ > diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c > index 6cb99b8769ea..e414257333e5 100644 > --- a/scripts/gendwarfksyms/symbols.c > +++ b/scripts/gendwarfksyms/symbols.c > @@ -92,6 +92,7 @@ void symbol_read_exports(FILE *file) > sym = xcalloc(1, sizeof(struct symbol)); > sym->name = name; > sym->addr.section = SHN_UNDEF; > + sym->state = SYMBOL_UNPROCESSED; > > hash_add(symbol_names, &sym->name_hash, hash_str(sym->name)); > ++nsym; > @@ -107,7 +108,8 @@ static void get_symbol(struct symbol *sym, void *arg) > { > struct symbol **res = arg; > > - *res = sym; > + if (sym->state == SYMBOL_UNPROCESSED) > + *res = sym; > } What is the reason to check that the symbol is in the unprocessed state here? > > struct symbol *symbol_get(const char *name) > diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c > new file mode 100644 > index 000000000000..c05811299eba > --- /dev/null > +++ b/scripts/gendwarfksyms/types.c > @@ -0,0 +1,359 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2024 Google LLC > + */ > + > +#include "gendwarfksyms.h" > + > +static struct expansion_cache expansion_cache; > + > +/* > + * A simple linked list of shared or owned strings to avoid copying strings > + * around when not necessary. > + */ > +struct type_list_entry { > + const char *str; > + void *owned; > + struct list_head list; > +}; > + > +static void type_list_free(struct list_head *list) > +{ > + struct type_list_entry *entry; > + struct type_list_entry *tmp; > + > + list_for_each_entry_safe(entry, tmp, list, list) { > + if (entry->owned) > + free(entry->owned); > + free(entry); > + } > + > + INIT_LIST_HEAD(list); > +} > + > +static int type_list_append(struct list_head *list, const char *s, void *owned) > +{ > + struct type_list_entry *entry; > + > + if (!s) > + return 0; > + > + entry = xmalloc(sizeof(struct type_list_entry)); > + entry->str = s; > + entry->owned = owned; > + list_add_tail(&entry->list, list); > + > + return strlen(entry->str); > +} > + > +static void type_list_write(struct list_head *list, FILE *file) > +{ > + struct type_list_entry *entry; > + > + list_for_each_entry(entry, list, list) { > + if (entry->str) > + checkp(fputs(entry->str, file)); > + } > +} > + > +/* > + * An expanded type string in symtypes format. > + */ > +struct type_expansion { > + char *name; > + size_t len; > + struct list_head expanded; > + struct hlist_node hash; > +}; > + > +static void type_expansion_init(struct type_expansion *type) > +{ > + type->name = NULL; > + type->len = 0; > + INIT_LIST_HEAD(&type->expanded); > +} > + > +static inline void type_expansion_free(struct type_expansion *type) > +{ > + free(type->name); > + type->name = NULL; > + type->len = 0; > + type_list_free(&type->expanded); > +} > + > +static void type_expansion_append(struct type_expansion *type, const char *s, > + void *owned) > +{ > + type->len += type_list_append(&type->expanded, s, owned); > +} > + > +/* > + * type_map -- the longest expansions for each type. > + * > + * const char *name -> struct type_expansion * > + */ > +#define TYPE_HASH_BITS 16 > +static HASHTABLE_DEFINE(type_map, 1 << TYPE_HASH_BITS); > + > +static int type_map_get(const char *name, struct type_expansion **res) > +{ > + struct type_expansion *e; > + > + hash_for_each_possible(type_map, e, hash, hash_str(name)) { > + if (!strcmp(name, e->name)) { > + *res = e; > + return 0; > + } > + } > + > + return -1; > +} > + > +static void type_map_add(const char *name, struct type_expansion *type) > +{ > + struct type_expansion *e; > + > + if (type_map_get(name, &e)) { > + e = xmalloc(sizeof(struct type_expansion)); > + type_expansion_init(e); > + e->name = xstrdup(name); > + > + hash_add(type_map, &e->hash, hash_str(e->name)); > + > + if (dump_types) > + debug("adding %s", e->name); > + } else { > + /* Use the longest available expansion */ > + if (type->len <= e->len) > + return; > + > + type_list_free(&e->expanded); > + > + if (dump_types) > + debug("replacing %s", e->name); > + } > + > + /* Take ownership of type->expanded */ > + list_replace_init(&type->expanded, &e->expanded); > + e->len = type->len; > + > + if (dump_types) { > + checkp(fputs(e->name, stderr)); > + checkp(fputs(" ", stderr)); > + type_list_write(&e->expanded, stderr); > + checkp(fputs("\n", stderr)); > + } > +} > + > +static void type_map_write(FILE *file) > +{ > + struct type_expansion *e; > + struct hlist_node *tmp; > + > + if (!file) > + return; > + > + hash_for_each_safe(type_map, e, tmp, hash) { > + checkp(fputs(e->name, file)); > + checkp(fputs(" ", file)); > + type_list_write(&e->expanded, file); > + checkp(fputs("\n", file)); > + } > +} > + > +static void type_map_free(void) > +{ > + struct type_expansion *e; > + struct hlist_node *tmp; > + > + hash_for_each_safe(type_map, e, tmp, hash) { > + type_expansion_free(e); > + free(e); > + } > + > + hash_init(type_map); > +} > + > +/* > + * Type reference format: <prefix>#<name>, where prefix: > + * s -> structure > + * u -> union > + * e -> enum > + * t -> typedef > + * > + * Names with spaces are additionally wrapped in single quotes. > + */ > +static char get_type_prefix(int tag) > +{ > + switch (tag) { > + case DW_TAG_class_type: > + case DW_TAG_structure_type: > + return 's'; > + case DW_TAG_union_type: > + return 'u'; > + case DW_TAG_enumeration_type: > + return 'e'; > + case DW_TAG_typedef_type: > + return 't'; > + default: > + return 0; > + } > +} > + > +static char *get_type_name(struct die *cache) > +{ > + const char *quote; > + char prefix; > + char *name; > + > + if (cache->state == DIE_INCOMPLETE) { > + warn("found incomplete cache entry: %p", cache); > + return NULL; > + } > + if (!cache->fqn || !*cache->fqn) > + return NULL; > + > + prefix = get_type_prefix(cache->tag); > + if (!prefix) > + return NULL; > + > + /* Wrap names with spaces in single quotes */ > + quote = strstr(cache->fqn, " ") ? "'" : ""; > + > + /* <prefix>#<type_name>\0 */ > + if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn, quote) < 0) > + error("asprintf failed for '%s'", cache->fqn); > + > + return name; > +} > + > +static void __type_expand(struct die *cache, struct type_expansion *type, > + bool recursive); > + > +static void type_expand_child(struct die *cache, struct type_expansion *type, > + bool recursive) > +{ > + struct type_expansion child; > + char *name; > + > + name = get_type_name(cache); > + if (!name) { > + __type_expand(cache, type, recursive); > + return; > + } > + > + if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) { > + __cache_mark_expanded(&expansion_cache, cache->addr); > + type_expansion_init(&child); > + __type_expand(cache, &child, true); > + type_map_add(name, &child); > + type_expansion_free(&child); > + } > + > + type_expansion_append(type, name, name); > +} > + > +static void __type_expand(struct die *cache, struct type_expansion *type, > + bool recursive) > +{ > + struct die_fragment *df; > + struct die *child; > + > + list_for_each_entry(df, &cache->fragments, list) { > + switch (df->type) { > + case FRAGMENT_STRING: > + type_expansion_append(type, df->data.str, NULL); > + break; > + case FRAGMENT_DIE: > + /* Use a complete die_map expansion if available */ > + if (__die_map_get(df->data.addr, DIE_COMPLETE, > + &child) && > + __die_map_get(df->data.addr, DIE_UNEXPANDED, > + &child)) > + error("unknown child: %" PRIxPTR, > + df->data.addr); > + > + type_expand_child(child, type, recursive); > + break; > + case FRAGMENT_LINEBREAK: > + /* > + * Keep whitespace in the symtypes format, but avoid > + * repeated spaces. > + */ > + if (list_is_last(&df->list, &cache->fragments) || > + list_next_entry(df, list)->type != > + FRAGMENT_LINEBREAK) > + type_expansion_append(type, " ", NULL); > + break; > + default: > + error("empty die_fragment in %p", cache); > + } > + } > +} > + > +static void type_expand(struct die *cache, struct type_expansion *type, > + bool recursive) > +{ > + type_expansion_init(type); > + __type_expand(cache, type, recursive); > + cache_clear_expanded(&expansion_cache); > +} > + > +static void expand_type(struct die *cache, void *arg) > +{ > + struct type_expansion type; > + char *name; > + > + if (cache->mapped) > + return; > + > + cache->mapped = true; > + > + /* > + * Skip unexpanded die_map entries if there's a complete > + * expansion available for this DIE. > + */ > + if (cache->state == DIE_UNEXPANDED && > + !__die_map_get(cache->addr, DIE_COMPLETE, &cache)) { > + if (cache->mapped) > + return; > + > + cache->mapped = true; > + } > + > + name = get_type_name(cache); > + if (!name) > + return; > + > + debug("%s", name); > + type_expand(cache, &type, true); > + type_map_add(name, &type); > + > + type_expansion_free(&type); > + free(name); > +} > + > +void generate_symtypes(FILE *file) > +{ > + hash_init(expansion_cache.cache); > + > + /* > + * die_map processing: > + * > + * 1. die_map contains all types referenced in exported symbol > + * signatures, but can contain duplicates just like the original > + * DWARF, and some references may not be fully expanded depending > + * on how far we processed the DIE tree for that specific symbol. > + * > + * For each die_map entry, find the longest available expansion, > + * and add it to type_map. > + */ > + die_map_for_each(expand_type, NULL); > + > + /* > + * 2. If a symtypes file is requested, write type_map contents to > + * the file. > + */ > + type_map_write(file); > + type_map_free(); > +} -- Thanks, Petr