Hi Jack, On RISC-V builds, we noticed a recent slow down after commit ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost") was introduced. We tracked it down to find_nearest_sym() being called a lot and more specifically since we have a lot of local symbols that are generated as part of PCREL accesses (even more when building in debug mode, measured a count of 12964362 symbols in one vmlinux.o). Without your changes, a typical riscv defconfig build + debug, modpost took the following amount of time: $ time scripts/mod/modpost -M -o Module.symvers -T modules.order vmlinux.o real 4m21,976s user 4m21,803s sys 0m0,100s With your changes: $ time scripts/mod/modpost -M -o Module.symvers -T modules.order vmlinux.o real 0m1,077s user 0m0,980s sys 0m0,095s I guess you could further optimize it by allocating a binary tree for each section since find_nearest_sym() searches for symbols in a specific section, that would save a few comparisons. Not sure it will be way faster nor simpler to implement though. FWIW: Tested-by: Clément Léger <cleger@xxxxxxxxxxxx> Thanks, Clément On 18/09/2023 23:06, Jack Brennen wrote: > Modify modpost to use binary search for converting addresses back > into symbol references. Previously it used linear search. > > This change saves a few seconds of wall time for defconfig builds, > but can save several minutes on allyesconfigs. > > Before: > $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error" > Elapsed (wall clock) time (h:mm:ss or m:ss): 13:30.31 > > After: > $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error" > Elapsed (wall clock) time (h:mm:ss or m:ss): 11:43.43 > > Signed-off-by: Jack Brennen <jbrennen@xxxxxxxxxx> > Tested-by: Nick Desaulniers <ndesaulniers@xxxxxxxxxx> > --- > scripts/mod/Makefile | 4 +- > scripts/mod/modpost.c | 60 +---------- > scripts/mod/modpost.h | 25 +++++ > scripts/mod/symsearch.c | 233 ++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 265 insertions(+), 57 deletions(-) > create mode 100644 scripts/mod/symsearch.c > > diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile > index c9e38ad937fd..3c54125eb373 100644 > --- a/scripts/mod/Makefile > +++ b/scripts/mod/Makefile > @@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO) > hostprogs-always-y += modpost mk_elfconfig > always-y += empty.o > > -modpost-objs := modpost.o file2alias.o sumversion.o > +modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o > > devicetable-offsets-file := devicetable-offsets.h > > @@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s > > # dependencies on generated files need to be listed explicitly > > -$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h > +$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h > $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file) > > quiet_cmd_elfconfig = MKELF $@ > diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c > index de499dce5265..975f235aca2c 100644 > --- a/scripts/mod/modpost.c > +++ b/scripts/mod/modpost.c > @@ -22,7 +22,6 @@ > #include <errno.h> > #include "modpost.h" > #include "../../include/linux/license.h" > -#include "../../include/linux/module_symbol.h" > > static bool module_enabled; > /* Are we using CONFIG_MODVERSIONS? */ > @@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename) > *p = TO_NATIVE(*p); > } > > + symsearch_init(info); > + > return 1; > } > > static void parse_elf_finish(struct elf_info *info) > { > + symsearch_finish(info); > release_file(info->hdr, info->size); > } > > @@ -1039,65 +1041,13 @@ static int secref_whitelist(const char *fromsec, const char *fromsym, > return 1; > } > > -/* > - * If there's no name there, ignore it; likewise, ignore it if it's > - * one of the magic symbols emitted used by current tools. > - * > - * Otherwise if find_symbols_between() returns those symbols, they'll > - * fail the whitelist tests and cause lots of false alarms ... fixable > - * only by merging __exit and __init sections into __text, bloating > - * the kernel (which is especially evil on embedded platforms). > - */ > -static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) > -{ > - const char *name = elf->strtab + sym->st_name; > - > - if (!name || !strlen(name)) > - return 0; > - return !is_mapping_symbol(name); > -} > - > /* Look up the nearest symbol based on the section and the address */ > static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr, > unsigned int secndx, bool allow_negative, > Elf_Addr min_distance) > { > - Elf_Sym *sym; > - Elf_Sym *near = NULL; > - Elf_Addr sym_addr, distance; > - bool is_arm = (elf->hdr->e_machine == EM_ARM); > - > - for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) { > - if (get_secindex(elf, sym) != secndx) > - continue; > - if (!is_valid_name(elf, sym)) > - continue; > - > - sym_addr = sym->st_value; > - > - /* > - * For ARM Thumb instruction, the bit 0 of st_value is set > - * if the symbol is STT_FUNC type. Mask it to get the address. > - */ > - if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC) > - sym_addr &= ~1; > - > - if (addr >= sym_addr) > - distance = addr - sym_addr; > - else if (allow_negative) > - distance = sym_addr - addr; > - else > - continue; > - > - if (distance <= min_distance) { > - min_distance = distance; > - near = sym; > - } > - > - if (min_distance == 0) > - break; > - } > - return near; > + return symsearch_find_nearest(elf, addr, secndx, > + allow_negative, min_distance); > } > > static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr, > diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h > index 5f94c2c9f2d9..6413f26fcb6b 100644 > --- a/scripts/mod/modpost.h > +++ b/scripts/mod/modpost.h > @@ -10,6 +10,7 @@ > #include <fcntl.h> > #include <unistd.h> > #include <elf.h> > +#include "../../include/linux/module_symbol.h" > > #include "list.h" > #include "elfconfig.h" > @@ -128,6 +129,8 @@ struct elf_info { > * take shndx from symtab_shndx_start[N] instead */ > Elf32_Word *symtab_shndx_start; > Elf32_Word *symtab_shndx_stop; > + > + struct symsearch *symsearch; > }; > > /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */ > @@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info, > return index; > } > > +/* > + * If there's no name there, ignore it; likewise, ignore it if it's > + * one of the magic symbols emitted used by current tools. > + * > + * Internal symbols created by tools should be ignored by modpost. > + */ > +static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym) > +{ > + const char *name = elf->strtab + sym->st_name; > + > + if (!name || !strlen(name)) > + return 0; > + return !is_mapping_symbol(name); > +} > + > +/* symsearch.c */ > +void symsearch_init(struct elf_info *elf); > +void symsearch_finish(struct elf_info *elf); > +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr, > + unsigned int secndx, bool allow_negative, > + Elf_Addr min_distance); > + > /* file2alias.c */ > void handle_moddevtable(struct module *mod, struct elf_info *info, > Elf_Sym *sym, const char *symname); > diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c > new file mode 100644 > index 000000000000..aab79262512b > --- /dev/null > +++ b/scripts/mod/symsearch.c > @@ -0,0 +1,233 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +/* Helper functions for finding the symbol in an ELF which is "nearest" > + * to a given address. > + */ > + > +#include "modpost.h" > + > +/* Struct used for binary search. */ > +struct syminfo { > + unsigned int symbol_index; > + unsigned int section_index; > + Elf_Addr addr; > +}; > + > +/* Container used to hold an entire binary search table. > + * Entries in table are ascending, sorted first by section_index, > + * then by addr, and last by symbol_index. The sorting by > + * symbol_index is used to duplicate the quirks of the prior > + * find_nearest_sym() function, where exact matches to an address > + * return the first symtab entry seen, but near misses return the > + * last symtab entry seen. > + * The first and last entries of the table are sentinels and their > + * values only matter in two places: when we sort the table, and > + * on lookups, the end sentinel should not have an addr field which > + * matches its immediate predecessor. To meet these requirements, > + * we initialize them to (0,0,0) and (max,max,max), and then after > + * sorting, we tweak the end sentinel's addr field accordingly. > + */ > +struct symsearch { > + size_t table_size; > + struct syminfo table[]; > +}; > + > +static inline bool is_sym_searchable(struct elf_info *elf, Elf_Sym *sym) > +{ > + return is_valid_name(elf, sym) != 0; > +} > + > +static int syminfo_compare(const void *s1, const void *s2) > +{ > + const struct syminfo *sym1 = s1; > + const struct syminfo *sym2 = s2; > + > + if (sym1->section_index > sym2->section_index) > + return 1; > + if (sym1->section_index < sym2->section_index) > + return -1; > + if (sym1->addr > sym2->addr) > + return 1; > + if (sym1->addr < sym2->addr) > + return -1; > + if (sym1->symbol_index > sym2->symbol_index) > + return 1; > + if (sym1->symbol_index < sym2->symbol_index) > + return -1; > + return 0; > +} > + > +static size_t symbol_count(struct elf_info *elf) > +{ > + size_t result = 0; > + > + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) { > + if (is_sym_searchable(elf, sym)) > + result++; > + } > + return result; > +} > + > +/* Populate the search array that we just allocated. > + * Be slightly paranoid here. If the ELF file changes during processing, > + * or if the behavior of is_sym_searchable() changes during processing, > + * we want to catch it; neither of those is acceptable. > + */ > +static void symsearch_populate(struct elf_info *elf, > + struct syminfo *table, > + size_t table_size) > +{ > + bool is_arm = (elf->hdr->e_machine == EM_ARM); > + > + /* Start sentinel */ > + if (table_size-- == 0) > + fatal("%s: size mismatch\n", __func__); > + table->symbol_index = 0; > + table->section_index = 0; > + table->addr = 0; > + table++; > + > + for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) { > + if (is_sym_searchable(elf, sym)) { > + if (table_size-- == 0) > + fatal("%s: size mismatch\n", __func__); > + table->symbol_index = sym - elf->symtab_start; > + table->section_index = get_secindex(elf, sym); > + table->addr = sym->st_value; > + > + /* > + * For ARM Thumb instruction, the bit 0 of st_value is > + * set if the symbol is STT_FUNC type. Mask it to get > + * the address. > + */ > + if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC) > + table->addr &= ~1; > + > + table++; > + } > + } > + > + /* End sentinel; all values are unsigned so -1 wraps to max */ > + if (table_size != 1) > + fatal("%s: size mismatch\n", __func__); > + table->symbol_index = -1; > + table->section_index = -1; > + table->addr = -1; > +} > + > +void symsearch_init(struct elf_info *elf) > +{ > + /* +2 here to allocate space for the start and end sentinels */ > + size_t table_size = symbol_count(elf) + 2; > + > + elf->symsearch = NOFAIL(malloc( > + sizeof(struct symsearch) + > + sizeof(struct syminfo) * table_size)); > + elf->symsearch->table_size = table_size; > + > + symsearch_populate(elf, elf->symsearch->table, table_size); > + qsort(elf->symsearch->table, table_size, > + sizeof(struct syminfo), syminfo_compare); > + > + /* A bit of paranoia; make sure that the end sentinel's address is > + * different than its predecessor. Not doing this could cause > + * possible undefined behavior if anybody ever inserts a symbol > + * with section_index and addr both at their max values. > + * Doing this little bit of defensive programming is more efficient > + * than checking for array overruns later. > + */ > + elf->symsearch->table[table_size - 1].addr = > + elf->symsearch->table[table_size - 2].addr + 1; > +} > + > +void symsearch_finish(struct elf_info *elf) > +{ > + free(elf->symsearch); > + elf->symsearch = NULL; > +} > + > +/* Find the syminfo which is in secndx and "nearest" to addr. > + * allow_negative: allow returning a symbol whose address is > addr. > + * min_distance: ignore symbols which are further away than this. > + * > + * Returns a nonzero index into the symsearch table for success. > + * Returns NULL if no legal symbol is found within the requested range. > + */ > +static size_t symsearch_find_impl(struct elf_info *elf, Elf_Addr addr, > + unsigned int secndx, bool allow_negative, > + Elf_Addr min_distance) > +{ > + /* Find the target in the array; it will lie between two elements. > + * Invariant here: table[lo] < target <= table[hi] > + * For the purposes of search, exact hits in the search array are > + * considered greater than the target. This means that if we do > + * get an exact hit, then once the search terminates, table[hi] > + * will be the exact match which has the lowest symbol index. > + */ > + struct syminfo *table = elf->symsearch->table; > + size_t hi = elf->symsearch->table_size - 1; > + size_t lo = 0; > + bool hi_is_usable = false; > + bool lo_is_usable = false; > + Elf_Addr hi_distance = -1; // max Elf_Addr > + Elf_Addr lo_distance = -1; // max Elf_Addr > + Elf_Addr min_distance_lo = min_distance; > + Elf_Addr min_distance_hi = allow_negative ? min_distance : 0; > + > + for (;;) { > + size_t mid; > + > + mid = lo + (hi - lo) / 2; > + if (mid == lo) > + break; > + if (secndx > table[mid].section_index) { > + lo = mid; > + } else if (secndx < table[mid].section_index) { > + hi = mid; > + } else if (addr > table[mid].addr) { > + lo = mid; > + lo_distance = addr - table[mid].addr; > + lo_is_usable = (lo_distance <= min_distance_lo); > + } else { > + hi = mid; > + hi_distance = table[mid].addr - addr; > + hi_is_usable = (hi_distance <= min_distance_hi); > + } > + } > + > + if (hi_is_usable && lo_is_usable) { > + lo_is_usable = (lo_distance <= hi_distance); > + hi_is_usable = (hi_distance <= lo_distance); > + } > + > + if (!hi_is_usable) > + return lo_is_usable ? lo : 0; > + > + if (hi_distance == 0) > + return hi; > + > + /* Match quirks of existing behavior. Advance hi to the last > + * matching entry in the search table. We don't need to worry > + * about running off the end of the array due to the sentinel. > + */ > + while (table[hi+1].addr == table[hi].addr && > + table[hi+1].section_index == table[hi].section_index) { > + hi++; > + } > + > + return (lo_is_usable && > + table[lo].symbol_index > table[hi].symbol_index) ? lo : hi; > +} > + > +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr, > + unsigned int secndx, bool allow_negative, > + Elf_Addr min_distance) > +{ > + size_t result = symsearch_find_impl(elf, addr, secndx, > + allow_negative, min_distance); > + > + if (result == 0) > + return NULL; > + > + return &elf->symtab_start[elf->symsearch->table[result].symbol_index]; > +}