On 8/15/24 19:39, Sami Tolvanen wrote: > Add support for producing genksyms-style symtypes files. Process > die_map to find the longest expansions for each type, and use symtypes > references in type definitions. The basic file format is similar to > genksyms, with two notable exceptions: > > 1. Type names with spaces (common with Rust) in references are > wrapped in single quotes. E.g.: > > s#'core::result::Result<u8, core::num::error::ParseIntError>' > > 2. The actual type definition is the simple parsed DWARF format we > output with --dump-dies, not the preprocessed C-style format > genksyms produces. Thank you for adding this output to the tool. > > Signed-off-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx> > --- > scripts/gendwarfksyms/Makefile | 1 + > scripts/gendwarfksyms/die.c | 13 + > scripts/gendwarfksyms/dwarf.c | 14 +- > scripts/gendwarfksyms/gendwarfksyms.c | 28 +- > scripts/gendwarfksyms/gendwarfksyms.h | 21 +- > scripts/gendwarfksyms/symbols.c | 11 +- > scripts/gendwarfksyms/types.c | 439 ++++++++++++++++++++++++++ > 7 files changed, 517 insertions(+), 10 deletions(-) > create mode 100644 scripts/gendwarfksyms/types.c > > diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile > index 681b42441840..4866a2fd0e46 100644 > --- a/scripts/gendwarfksyms/Makefile > +++ b/scripts/gendwarfksyms/Makefile > @@ -5,6 +5,7 @@ gendwarfksyms-objs += cache.o > gendwarfksyms-objs += die.o > gendwarfksyms-objs += dwarf.o > gendwarfksyms-objs += symbols.o > +gendwarfksyms-objs += types.o > > HOST_EXTRACFLAGS := -I $(srctree)/tools/include > HOSTLDLIBS_gendwarfksyms := -ldw -lelf > diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c > index fdd52df88fdd..e40f04b70f7f 100644 > --- a/scripts/gendwarfksyms/die.c > +++ b/scripts/gendwarfksyms/die.c > @@ -85,6 +85,19 @@ static void reset_die(struct die *cd) > cd->list = NULL; > } > > +int die_map_for_each(die_map_callback_t func, void *arg) > +{ > + struct die *cd; > + struct hlist_node *tmp; > + int i; > + > + hash_for_each_safe(die_map, i, tmp, cd, hash) { > + check(func(cd, arg)); > + } > + > + return 0; > +} > + > void die_map_free(void) > { > struct hlist_node *tmp; > diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c > index 9bca21a71639..62241cc97a76 100644 > --- a/scripts/gendwarfksyms/dwarf.c > +++ b/scripts/gendwarfksyms/dwarf.c > @@ -60,11 +60,11 @@ static bool is_export_symbol(struct state *state, Dwarf_Die *die) > if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin)) > source = &origin; > > - state->sym = symbol_get(get_name(die)); > + state->sym = symbol_get_unprocessed(get_name(die)); > > /* Look up using the origin name if there are no matches. */ > if (!state->sym && source != die) > - state->sym = symbol_get(get_name(source)); > + state->sym = symbol_get_unprocessed(get_name(source)); > > state->die = *source; > return !!state->sym; > @@ -384,6 +384,7 @@ static int process_subroutine_type(struct state *state, struct die *cache, > return check(__process_subroutine_type(state, cache, die, > "subroutine_type")); > } > + > static int process_variant_type(struct state *state, struct die *cache, > Dwarf_Die *die) > { > @@ -695,14 +696,16 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die) > static int process_subprogram(struct state *state, Dwarf_Die *die) > { > check(__process_subroutine_type(state, NULL, die, "subprogram")); > - return check(process(state, NULL, ";\n")); > + state->sym->state = MAPPED; > + return 0; > } > > static int process_variable(struct state *state, Dwarf_Die *die) > { > check(process(state, NULL, "variable ")); > check(process_type_attr(state, NULL, die)); > - return check(process(state, NULL, ";\n")); > + state->sym->state = MAPPED; > + return 0; > } > > static int process_symbol_ptr(struct state *state, Dwarf_Die *die) > @@ -757,6 +760,9 @@ static int process_exported_symbols(struct state *state, struct die *cache, > else > check(process_variable(state, &state->die)); > > + if (dump_dies) > + fputs("\n", stderr); > + > cache_clear_expanded(&state->expansion_cache); > return 0; > default: > diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c > index 1349e592783b..6a219a54c342 100644 > --- a/scripts/gendwarfksyms/gendwarfksyms.c > +++ b/scripts/gendwarfksyms/gendwarfksyms.c > @@ -20,6 +20,11 @@ bool debug; > bool dump_dies; > /* Print out inline debugging information about die_map changes */ > bool dump_die_map; > +/* Print out type_map contents */ > +bool dump_types; > +/* Produce a symtypes file */ > +bool symtypes; > +static const char *symtypes_file; > > static const struct { > const char *arg; > @@ -29,6 +34,8 @@ static const struct { > { "--debug", &debug, NULL }, > { "--dump-dies", &dump_dies, NULL }, > { "--dump-die-map", &dump_die_map, NULL }, > + { "--dump-types", &dump_types, NULL }, > + { "--symtypes", &symtypes, &symtypes_file }, > }; > > static int usage(void) > @@ -79,6 +86,7 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name, > Dwarf_Die cudie; > Dwarf_CU *cu = NULL; > Dwarf *dbg; > + FILE *symfile = arg; > int res; > > debug("%s", name); > @@ -100,6 +108,10 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name, > check(process_module(mod, dbg, &cudie)); > } while (cu); > > + /* > + * Use die_map to expand type strings and write them to `symfile`. > + */ > + check(generate_symtypes(symfile)); > die_map_free(); > > return DWARF_CB_OK; > @@ -112,6 +124,7 @@ static const Dwfl_Callbacks callbacks = { > > int main(int argc, const char **argv) > { > + FILE *symfile = NULL; > unsigned int n; > > if (parse_options(argc, argv) < 0) > @@ -122,6 +135,16 @@ int main(int argc, const char **argv) > > check(symbol_read_exports(stdin)); > > + if (symtypes_file) { > + symfile = fopen(symtypes_file, "w+"); The file is sufficient to open only for writing. > + > + if (!symfile) { > + error("fopen failed for '%s': %s", symtypes_file, > + strerror(errno)); > + return -1; > + } > + } > + > for (n = 0; n < object_count; n++) { > Dwfl *dwfl; > int fd; > @@ -151,7 +174,7 @@ int main(int argc, const char **argv) > > dwfl_report_end(dwfl, NULL, NULL); > > - if (dwfl_getmodules(dwfl, &process_modules, NULL, 0)) { > + if (dwfl_getmodules(dwfl, &process_modules, symfile, 0)) { > error("dwfl_getmodules failed for '%s'", > object_files[n]); > return -1; > @@ -161,5 +184,8 @@ int main(int argc, const char **argv) > close(fd); > } > > + if (symfile) > + fclose(symfile); > + > return 0; > } The fclose() call should be wrapped in check() to catch a situation when flushing the stream potentially failed. > diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h > index 7cd907e3d5e3..6edbd6478e0f 100644 > --- a/scripts/gendwarfksyms/gendwarfksyms.h > +++ b/scripts/gendwarfksyms/gendwarfksyms.h > @@ -22,6 +22,8 @@ > extern bool debug; > extern bool dump_dies; > extern bool dump_die_map; > +extern bool dump_types; > +extern bool symtypes; > > #define MAX_INPUT_FILES 128 > > @@ -89,6 +91,12 @@ extern bool dump_die_map; > #define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_" > #define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1) > > +/* See dwarf.c:is_declaration */ > +#define SYMBOL_DECLONLY_PREFIX "__gendwarfksyms_declonly_" > +#define SYMBOL_DECLONLY_PREFIX_LEN (sizeof(SYMBOL_DECLONLY_PREFIX) - 1) Nit: These defines should go into the patch 15/19 "gendwarfksyms: Add support for declaration-only data structures". > + > +enum symbol_state { UNPROCESSED, MAPPED }; > + > struct symbol_addr { > uint32_t section; > Elf64_Addr address; > @@ -109,12 +117,14 @@ struct symbol { > struct symbol_addr addr; > struct hlist_node addr_hash; > struct hlist_node name_hash; > + enum symbol_state state; > + uintptr_t die_addr; > }; > > extern bool is_symbol_ptr(const char *name); > extern int symbol_read_exports(FILE *file); > extern int symbol_read_symtab(int fd); > -extern struct symbol *symbol_get(const char *name); > +extern struct symbol *symbol_get_unprocessed(const char *name); > > /* > * die.c > @@ -157,12 +167,15 @@ struct die { > struct hlist_node hash; > }; > > +typedef int (*die_map_callback_t)(struct die *, void *arg); > + > extern int __die_map_get(uintptr_t addr, enum die_state state, > struct die **res); > extern int die_map_get(Dwarf_Die *die, enum die_state state, struct die **res); > extern int die_map_add_string(struct die *pd, const char *str); > extern int die_map_add_linebreak(struct die *pd, int linebreak); > extern int die_map_add_die(struct die *pd, struct die *child); > +extern int die_map_for_each(die_map_callback_t func, void *arg); > extern void die_map_free(void); > > /* > @@ -222,4 +235,10 @@ extern int process_die_container(struct state *state, struct die *cache, > > extern int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie); > > +/* > + * types.c > + */ > + > +extern int generate_symtypes(FILE *file); > + > #endif /* __GENDWARFKSYMS_H */ > diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c > index d6d016458ae1..8cc04e6295a7 100644 > --- a/scripts/gendwarfksyms/symbols.c > +++ b/scripts/gendwarfksyms/symbols.c > @@ -117,6 +117,7 @@ int symbol_read_exports(FILE *file) > > sym->name = name; > sym->addr.section = SHN_UNDEF; > + sym->state = UNPROCESSED; > name = NULL; > > hash_add(symbol_names, &sym->name_hash, name_hash(sym->name)); > @@ -132,19 +133,21 @@ int symbol_read_exports(FILE *file) > return 0; > } > > -static int get_symbol(struct symbol *sym, void *arg) > +static int get_unprocessed(struct symbol *sym, void *arg) > { > struct symbol **res = arg; > > - *res = sym; > + if (sym->state == UNPROCESSED) > + *res = sym; > + > return 0; > } > > -struct symbol *symbol_get(const char *name) > +struct symbol *symbol_get_unprocessed(const char *name) > { > struct symbol *sym = NULL; > > - for_each(name, false, get_symbol, &sym); > + for_each(name, false, get_unprocessed, &sym); > return sym; > } > > diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c > new file mode 100644 > index 000000000000..7b9997d8322d > --- /dev/null > +++ b/scripts/gendwarfksyms/types.c > @@ -0,0 +1,439 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * Copyright (C) 2024 Google LLC > + */ > + > +#include "gendwarfksyms.h" > +#include "crc32.h" > + > +static struct expansion_cache expansion_cache; > + > +/* > + * A simple linked list of shared or owned strings to avoid copying strings > + * around when not necessary. > + */ > +struct type_list { > + const char *str; > + void *owned; > + struct type_list *next; > +}; > + > +static struct type_list *type_list_alloc(void) > +{ > + struct type_list *list; > + > + list = calloc(1, sizeof(struct type_list)); > + if (!list) > + error("calloc failed"); > + > + return list; > +} > + > +static void type_list_free(struct type_list *list) > +{ > + struct type_list *tmp; > + > + while (list) { > + if (list->owned) > + free(list->owned); > + > + tmp = list; > + list = list->next; > + free(tmp); > + } > +} > + > +static int type_list_append(struct type_list *list, const char *s, void *owned) > +{ > + if (!list || !s) > + return 0; > + > + while (list->next) > + list = list->next; > + > + if (list->str) { > + list->next = type_list_alloc(); > + > + if (!list->next) { > + error("type_list_alloc failed"); > + return -1; > + } > + > + list = list->next; > + } > + > + list->str = s; > + list->owned = owned; > + > + return strlen(list->str); > +} > + > +static int type_list_write(struct type_list *list, FILE *file) > +{ > + while (list) { > + if (list->str) > + checkp(fputs(list->str, file)); > + list = list->next; > + } > + > + return 0; > +} > + > +/* > + * An expanded type string in symtypes format. > + */ > +struct type_expansion { > + char *name; > + struct type_list *expanded; > + struct type_list *last; > + size_t len; > + struct hlist_node hash; > +}; I found the manipulation of type_expansion.expanded and type_expansion.last somewhat strange. The list starts already with one element in type_expansion_init(). This is apparently to make the last pointer valid. This element is however empty and gets only assigned on the first call to type_list_append(). Other elements are then added normally, always assigned. Perhaps consider using a regular list implementation, similarly to what was discussed under the patch 06/19 "gendwarfksyms: Add a cache for processed DIEs". > + > +static int type_expansion_init(struct type_expansion *type, bool alloc) > +{ > + memset(type, 0, sizeof(struct type_expansion)); > + if (alloc) { > + type->expanded = type_list_alloc(); > + if (!type->expanded) > + return -1; > + > + type->last = type->expanded; > + } > + return 0; > +} > + > +static inline void type_expansion_free(struct type_expansion *type) > +{ > + free(type->name); > + type_list_free(type->expanded); > + type_expansion_init(type, false); > +} > + > +static int type_expansion_append(struct type_expansion *type, const char *s, > + void *owned) > +{ > + type->len += checkp(type_list_append(type->last, s, owned)); > + > + if (type->last->next) > + type->last = type->last->next; > + > + return 0; > +} > + > +/* > + * type_map -- the longest expansions for each type. > + * > + * const char *name -> struct type_expansion * > + */ > +#define TYPE_HASH_BITS 16 > +static DEFINE_HASHTABLE(type_map, TYPE_HASH_BITS); > + > +static int type_map_get(const char *name, struct type_expansion **res) > +{ > + struct type_expansion *e; > + > + hash_for_each_possible(type_map, e, hash, name_hash(name)) { > + if (!strcmp(name, e->name)) { > + *res = e; > + return 0; > + } > + } > + > + return -1; > +} > + > +static int type_map_add(const char *name, struct type_expansion *type) > +{ > + struct type_expansion *e; > + > + if (type_map_get(name, &e)) { > + e = malloc(sizeof(struct type_expansion)); > + if (!e) { > + error("malloc failed"); > + return -1; > + } > + > + type_expansion_init(e, false); > + > + e->name = strdup(name); > + if (!e->name) { > + error("strdup failed"); > + return -1; > + } > + > + hash_add(type_map, &e->hash, name_hash(e->name)); > + > + if (dump_types) > + debug("adding %s", e->name); > + } else { > + /* Use the longest available expansion */ > + if (type->len <= e->len) > + return 0; > + > + type_list_free(e->expanded); > + > + if (dump_types) > + debug("replacing %s", e->name); > + } > + > + /* Take ownership of type->expanded */ > + e->expanded = type->expanded; > + e->last = type->last; > + e->len = type->len; > + type->expanded = NULL; > + type->last = NULL; > + type->len = 0; > + > + if (dump_types) { > + fputs(e->name, stderr); > + fputs(" ", stderr); > + type_list_write(e->expanded, stderr); > + fputs("\n", stderr); > + } > + > + return 0; > +} > + > +static int type_map_write(FILE *file) > +{ > + struct type_expansion *e; > + struct hlist_node *tmp; > + int i; > + > + if (!file) > + return 0; > + > + hash_for_each_safe(type_map, i, tmp, e, hash) { > + checkp(fputs(e->name, file)); > + checkp(fputs(" ", file)); > + type_list_write(e->expanded, file); > + checkp(fputs("\n", file)); > + } > + > + return 0; > +} > + > +static void type_map_free(void) > +{ > + struct type_expansion *e; > + struct hlist_node *tmp; > + int i; > + > + hash_for_each_safe(type_map, i, tmp, e, hash) { > + type_expansion_free(e); > + free(e); > + } > + > + hash_init(type_map); > +} > + > +/* > + * Type reference format: <prefix>#<name>, where prefix: > + * s -> structure > + * u -> union > + * e -> enum > + * t -> typedef > + * > + * Names with spaces are additionally wrapped in single quotes. > + */ > +static inline bool is_type_prefix(const char *s) > +{ > + return (s[0] == 's' || s[0] == 'u' || s[0] == 'e' || s[0] == 't') && > + s[1] == '#'; > +} > + > +static char get_type_prefix(int tag) > +{ > + switch (tag) { > + case DW_TAG_class_type: > + case DW_TAG_structure_type: > + return 's'; > + case DW_TAG_union_type: > + return 'u'; > + case DW_TAG_enumeration_type: > + return 'e'; > + case DW_TAG_typedef_type: > + return 't'; > + default: > + return 0; > + } > +} > + > +static char *get_type_name(struct die *cache) > +{ > + const char *format; > + char prefix; > + char *name; > + size_t len; > + > + if (cache->state == INCOMPLETE) { > + warn("found incomplete cache entry: %p", cache); > + return NULL; > + } > + if (!cache->fqn) > + return NULL; > + > + prefix = get_type_prefix(cache->tag); > + if (!prefix) > + return NULL; > + > + /* <prefix>#<type_name>\0 */ > + len = 2 + strlen(cache->fqn) + 1; > + > + /* Wrap names with spaces in single quotes */ > + if (strstr(cache->fqn, " ")) { > + format = "%c#'%s'"; > + len += 2; > + } else { > + format = "%c#%s"; > + } > + > + name = malloc(len); > + if (!name) { > + error("malloc failed"); > + return NULL; > + } > + > + if (snprintf(name, len, format, prefix, cache->fqn) >= len) { > + error("snprintf failed for '%s' (length %zu)", cache->fqn, > + len); > + free(name); > + return NULL; > + } This could be quite simplified: const char *quote = strstr(cache->fqn, " ") != NULL ? "'" : ""; if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn, quote) < 0) [...] > + > + return name; > +} > + > +static int __type_expand(struct die *cache, struct type_expansion *type, > + bool recursive); > + > +static int type_expand_child(struct die *cache, struct type_expansion *type, > + bool recursive) > +{ > + struct type_expansion child; > + char *name; > + > + name = get_type_name(cache); > + if (!name) > + return check(__type_expand(cache, type, recursive)); > + > + if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) { > + check(__cache_mark_expanded(&expansion_cache, cache->addr)); > + check(type_expansion_init(&child, true)); > + check(__type_expand(cache, &child, true)); > + check(type_map_add(name, &child)); > + type_expansion_free(&child); > + } > + > + check(type_expansion_append(type, name, name)); > + return 0; > +} > + > +static int __type_expand(struct die *cache, struct type_expansion *type, > + bool recursive) > +{ > + struct die_fragment *df = cache->list; > + struct die *child; > + > + while (df) { > + switch (df->type) { > + case STRING: > + check(type_expansion_append(type, df->data.str, NULL)); > + break; > + case DIE: > + /* Use a complete die_map expansion if available */ > + if (__die_map_get(df->data.addr, COMPLETE, &child) && > + __die_map_get(df->data.addr, UNEXPANDED, &child)) { > + error("unknown child: %" PRIxPTR, > + df->data.addr); > + return -1; > + } > + > + check(type_expand_child(child, type, recursive)); > + break; > + case LINEBREAK: > + /* > + * Keep whitespace in the symtypes format, but avoid > + * repeated spaces. > + */ > + if (!df->next || df->next->type != LINEBREAK) > + check(type_expansion_append(type, " ", NULL)); > + break; > + default: > + error("empty die_fragment in %p", cache); > + return -1; > + } > + > + df = df->next; > + } > + > + return 0; > +} > + > +static int type_expand(struct die *cache, struct type_expansion *type, > + bool recursive) > +{ > + check(type_expansion_init(type, true)); > + check(__type_expand(cache, type, recursive)); > + cache_clear_expanded(&expansion_cache); > + return 0; > +} > + > +static int expand_type(struct die *cache, void *arg) > +{ > + struct type_expansion type; > + char *name; > + > + /* > + * Skip unexpanded die_map entries if there's a complete > + * expansion available for this DIE. > + */ > + if (cache->state == UNEXPANDED) > + __die_map_get(cache->addr, COMPLETE, &cache); > + > + if (cache->mapped) > + return 0; > + > + cache->mapped = true; > + > + name = get_type_name(cache); > + if (!name) > + return 0; > + > + debug("%s", name); > + check(type_expand(cache, &type, true)); > + check(type_map_add(name, &type)); > + > + type_expansion_free(&type); > + free(name); > + > + return 0; > +} > + > +int generate_symtypes(FILE *file) > +{ > + hash_init(expansion_cache.cache); > + > + /* > + * die_map processing: > + * > + * 1. die_map contains all types referenced in exported symbol > + * signatures, but can contain duplicates just like the original > + * DWARF, and some references may not be fully expanded depending > + * on how far we processed the DIE tree for that specific symbol. > + * > + * For each die_map entry, find the longest available expansion, > + * and add it to type_map. > + */ > + check(die_map_for_each(expand_type, NULL)); > + > + /* > + * 2. If a symtypes file is requested, write type_map contents to > + * the file. > + */ > + check(type_map_write(file)); > + type_map_free(); > + > + return 0; > +} -- Thanks, Petr