On Thu, Feb 22, 2024 at 5:27 AM Jann Horn <jannh@xxxxxxxxxx> wrote: > > Currently, kallsyms builds a big assembly file (~19M with a normal > kernel config), and then the assembler has to turn that big assembly > file back into binary data, which takes around a second per kallsyms > invocation. (Normally there are two kallsyms invocations per build.) > > It is much faster to instead directly output binary data, which can > be imported in an assembly file using ".incbin". This is also the > approach taken by arch/x86/boot/compressed/mkpiggy.c. Yes, that is a sensible case because it just wraps the binary without any modification. > So this patch switches kallsyms to that approach. > > A complication with this is that the endianness of numbers between > host and target might not match (for example, when cross-compiling); > and there seems to be no kconfig symbol that tells us what endianness > the target has. CONFIG_CPU_BIG_ENDIAN is it. You could do this: if is_enabled CONFIG_CPU_BIG_ENDIAN; then kallsymopt="${kallsymopt} --big-endian" fi if is_enabled CONFIG_64BIT; then kallsymopt="${kallsymopt} --64bit" fi > So pass the path to the intermediate vmlinux ELF file to the kallsyms > tool, and let it parse the ELF header to figure out the target's > endianness. > > I have verified that running kallsyms without these changes and > kallsyms with these changes on the same input System.map results > in identical object files. > > This change reduces the time for an incremental kernel rebuild > (touch fs/ioctl.c, then re-run make) from 27.7s to 24.1s (medians > over 16 runs each) on my machine - saving around 3.6 seconds. This reverts bea5b74504742f1b51b815bcaf9a70bddbc49ce3 Somebody might struggle with debugging again, but I am not sure. Arnd? If the effort were "I invented a way to do kallsyms in one pass instead of three", it would be so much more attractive. I am not so sure if this grain of the optimization is exciting, but I confirmed that a few seconds were saved for the defconfig. I am neutral about this. For the debugging purpose, perhaps we can add --debug option in order to leave the possibility for outputting the full assembly as comments. > > Signed-off-by: Jann Horn <jannh@xxxxxxxxxx> > --- > scripts/kallsyms.c | 196 ++++++++++++++++++++++++++++++++-------- > scripts/link-vmlinux.sh | 5 +- > 2 files changed, 159 insertions(+), 42 deletions(-) > > diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c > index f35be95adfbe..ef03d723aded 100644 > --- a/scripts/kallsyms.c > +++ b/scripts/kallsyms.c > @@ -27,6 +27,10 @@ > #include <string.h> > #include <ctype.h> > #include <limits.h> > +#include <endian.h> > +#include <elf.h> > +#include <fcntl.h> > +#include <unistd.h> > > #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) > > @@ -75,7 +79,7 @@ static unsigned char best_table_len[256]; > static void usage(void) > { > fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] " > - "[--lto-clang] in.map > out.S\n"); > + "[--lto-clang] in in.map out.S out.bin\n"); > exit(1); > } > > @@ -290,20 +294,57 @@ static void read_map(const char *in) > fclose(fp); > } > > +static bool is_64bit, is_little_endian; > +static char *asm_path, *bin_path; > +static FILE *asm_file, *bin_file; > +static size_t bin_offset, bin_included; > + > static void output_label(const char *label) > { > - printf(".globl %s\n", label); > - printf("\tALGN\n"); > - printf("%s:\n", label); > + fprintf(asm_file, ".globl %s\n", label); > + fprintf(asm_file, "\tALGN\n"); > + fprintf(asm_file, "%s:\n", label); > } > > /* Provide proper symbols relocatability by their '_text' relativeness. */ > static void output_address(unsigned long long addr) > { > if (_text <= addr) > - printf("\tPTR\t_text + %#llx\n", addr - _text); > + fprintf(asm_file, "\tPTR\t_text + %#llx\n", addr - _text); > else > - printf("\tPTR\t_text - %#llx\n", _text - addr); > + fprintf(asm_file, "\tPTR\t_text - %#llx\n", _text - addr); > +} > + > +/* > + * Include all data that has been written into bin_file since the last call to > + * this function. > + */ > +static void include_bin_data(void) > +{ > + fprintf(asm_file, ".incbin \"%s\", %zu, %zu\n", bin_path, > + bin_included, bin_offset - bin_included); > + bin_included = bin_offset; > +} > + > +static void output_bin_data(const void *data, size_t len) > +{ > + if (fwrite(data, 1, len, bin_file) != len) { > + fprintf(stderr, "kallsyms: unable to write output\n"); > + exit(EXIT_FAILURE); > + } > + bin_offset += len; > +} > +static void output_bin_u32(uint32_t value) > +{ > + uint32_t encoded = is_little_endian ? htole32(value) : htobe32(value); > + > + output_bin_data(&encoded, sizeof(encoded)); > +} > +static void output_bin_u16(uint16_t value) You might want to insert a blank line between functions. > +{ > + uint16_t encoded = is_little_endian ? htole16(value) : htobe16(value); > + > + output_bin_data(&encoded, sizeof(encoded)); > } > > /* uncompress a compressed symbol. When this function is called, the best table > @@ -384,25 +425,36 @@ static void sort_symbols_by_name(void) > > static void write_src(void) > { > - unsigned int i, k, off; > + unsigned int i, off; > unsigned int best_idx[256]; > unsigned int *markers; > char buf[KSYM_NAME_LEN]; > > - printf("#include <asm/bitsperlong.h>\n"); > - printf("#if BITS_PER_LONG == 64\n"); > - printf("#define PTR .quad\n"); > - printf("#define ALGN .balign 8\n"); > - printf("#else\n"); > - printf("#define PTR .long\n"); > - printf("#define ALGN .balign 4\n"); > - printf("#endif\n"); > + asm_file = fopen(asm_path, "w"); > + if (!asm_file) { > + perror("unable to open asm output"); > + exit(EXIT_FAILURE); > + } > + bin_file = fopen(bin_path, "w"); > + if (!bin_file) { > + perror("unable to open bin output"); > + exit(EXIT_FAILURE); > + } > + > + fprintf(asm_file, "#include <asm/bitsperlong.h>\n"); > + fprintf(asm_file, "#if BITS_PER_LONG == 64\n"); > + fprintf(asm_file, "#define PTR .quad\n"); > + fprintf(asm_file, "#define ALGN .balign 8\n"); > + fprintf(asm_file, "#else\n"); > + fprintf(asm_file, "#define PTR .long\n"); > + fprintf(asm_file, "#define ALGN .balign 4\n"); > + fprintf(asm_file, "#endif\n"); With this patch, this tool will need to be aware whether the target is 64-bit or not. There is no point to include <asm/bitsperlong.h> to check BITS_PER_LONG. > > - printf("\t.section .rodata, \"a\"\n"); > + fprintf(asm_file, "\t.section .rodata, \"a\"\n"); > > output_label("kallsyms_num_syms"); > - printf("\t.long\t%u\n", table_cnt); > - printf("\n"); > + fprintf(asm_file, "\t.long\t%u\n", table_cnt); > + fprintf(asm_file, "\n"); > > /* table of offset markers, that give the offset in the compressed stream > * every 256 symbols */ > @@ -437,20 +489,23 @@ static void write_src(void) > /* Encode length with ULEB128. */ > if (table[i]->len <= 0x7F) { > /* Most symbols use a single byte for the length. */ > - printf("\t.byte 0x%02x", table[i]->len); > + unsigned char len_encoded[1] = { table[i]->len }; > + > + output_bin_data(len_encoded, sizeof(len_encoded)); > off += table[i]->len + 1; > } else { > /* "Big" symbols use two bytes. */ > - printf("\t.byte 0x%02x, 0x%02x", > + unsigned char len_encoded[2] = { > (table[i]->len & 0x7F) | 0x80, > - (table[i]->len >> 7) & 0x7F); > + (table[i]->len >> 7) & 0x7F > + }; > + > + output_bin_data(len_encoded, sizeof(len_encoded)); > off += table[i]->len + 2; > } > - for (k = 0; k < table[i]->len; k++) > - printf(", 0x%02x", table[i]->sym[k]); > - printf("\n"); > + output_bin_data(table[i]->sym, table[i]->len); > } > - printf("\n"); > + include_bin_data(); > > /* > * Now that we wrote out the compressed symbol names, restore the > @@ -463,8 +518,8 @@ static void write_src(void) > > output_label("kallsyms_markers"); > for (i = 0; i < ((table_cnt + 255) >> 8); i++) > - printf("\t.long\t%u\n", markers[i]); > - printf("\n"); > + output_bin_u32(markers[i]); > + include_bin_data(); > > free(markers); > > @@ -473,15 +528,15 @@ static void write_src(void) > for (i = 0; i < 256; i++) { > best_idx[i] = off; > expand_symbol(best_table[i], best_table_len[i], buf); > - printf("\t.asciz\t\"%s\"\n", buf); > + output_bin_data(buf, strlen(buf)+1); > off += strlen(buf) + 1; > } > - printf("\n"); > + include_bin_data(); > > output_label("kallsyms_token_index"); > for (i = 0; i < 256; i++) > - printf("\t.short\t%d\n", best_idx[i]); > - printf("\n"); > + output_bin_u16(best_idx[i]); > + include_bin_data(); > > output_label("kallsyms_offsets"); > > @@ -513,13 +568,12 @@ static void write_src(void) > table[i]->addr); > exit(EXIT_FAILURE); > } > - printf("\t.long\t%#x /* %s */\n", (int)offset, table[i]->sym); > + output_bin_u32((uint32_t)offset); > } > - printf("\n"); > + include_bin_data(); > > output_label("kallsyms_relative_base"); > output_address(relative_base); > - printf("\n"); > > if (lto_clang) > for (i = 0; i < table_cnt; i++) > @@ -527,12 +581,24 @@ static void write_src(void) > > sort_symbols_by_name(); > output_label("kallsyms_seqs_of_names"); > - for (i = 0; i < table_cnt; i++) > - printf("\t.byte 0x%02x, 0x%02x, 0x%02x\n", > + for (i = 0; i < table_cnt; i++) { > + unsigned char seq_encoded[3] = { > (unsigned char)(table[i]->seq >> 16), > (unsigned char)(table[i]->seq >> 8), > - (unsigned char)(table[i]->seq >> 0)); > - printf("\n"); > + (unsigned char)(table[i]->seq >> 0) > + }; > + output_bin_data(seq_encoded, sizeof(seq_encoded)); > + } > + include_bin_data(); > + > + if (fclose(asm_file)) { > + perror("unable to write to asm output"); > + exit(EXIT_FAILURE); > + } > + if (fclose(bin_file)) { > + perror("unable to write to bin output"); > + exit(EXIT_FAILURE); > + } > } > > > @@ -795,6 +861,52 @@ static void record_relative_base(void) > } > } > > +static void get_target_data_types(const char *elf_path) > +{ > + int elf_fd = open(elf_path, O_RDONLY); > + unsigned char elf_ident[EI_NIDENT]; > + > + if (elf_fd == -1) { > + perror("open ELF"); > + exit(EXIT_FAILURE); > + } > + if (read(elf_fd, elf_ident, sizeof(elf_ident)) != sizeof(elf_ident)) { > + perror("read ELF header"); > + exit(EXIT_FAILURE); > + } > + close(elf_fd); > + > + if (elf_ident[EI_MAG0] != ELFMAG0 || elf_ident[EI_MAG1] != ELFMAG1 || > + elf_ident[EI_MAG2] != ELFMAG2 || elf_ident[EI_MAG3] != ELFMAG3) { > + fprintf(stderr, "kallsyms: input ELF has invalid header\n"); > + exit(EXIT_FAILURE); > + } > + > + switch (elf_ident[EI_CLASS]) { > + case ELFCLASS32: > + is_64bit = false; > + break; > + case ELFCLASS64: > + is_64bit = true; > + break; > + default: > + fprintf(stderr, "kallsyms: input ELF has invalid bitness\n"); > + exit(EXIT_FAILURE); > + } > + > + switch (elf_ident[EI_DATA]) { > + case ELFDATA2LSB: > + is_little_endian = true; > + break; > + case ELFDATA2MSB: > + is_little_endian = false; > + break; > + default: > + fprintf(stderr, "kallsyms: input ELF has invalid endianness\n"); > + exit(EXIT_FAILURE); > + } > +} > + > int main(int argc, char **argv) > { > while (1) { > @@ -813,10 +925,14 @@ int main(int argc, char **argv) > usage(); > } > > - if (optind >= argc) > + if (optind+4 != argc) > usage(); > + asm_path = argv[optind+2]; > + bin_path = argv[optind+3]; > + > + get_target_data_types(argv[optind]); > > - read_map(argv[optind]); > + read_map(argv[optind+1]); > shrink_table(); > if (absolute_percpu) > make_percpus_absolute(); > diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh > index 5127371d3393..1b5ff33a2d4a 100755 > --- a/scripts/link-vmlinux.sh > +++ b/scripts/link-vmlinux.sh > @@ -162,7 +162,7 @@ kallsyms() > fi > > info KSYMS ${2} > - scripts/kallsyms ${kallsymopt} ${1} > ${2} > + scripts/kallsyms ${kallsymopt} ${1} ${2} ${3} ${4} > } > > # Perform one step in kallsyms generation, including temporary linking of > @@ -173,10 +173,11 @@ kallsyms_step() > kallsyms_vmlinux=.tmp_vmlinux.kallsyms${1} > kallsymso=${kallsyms_vmlinux}.o > kallsyms_S=${kallsyms_vmlinux}.S > + kallsyms_bin=${kallsyms_vmlinux}.bin > > vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" ${btf_vmlinux_bin_o} > mksysmap ${kallsyms_vmlinux} ${kallsyms_vmlinux}.syms ${kallsymso_prev} > - kallsyms ${kallsyms_vmlinux}.syms ${kallsyms_S} > + kallsyms ${kallsyms_vmlinux} ${kallsyms_vmlinux}.syms ${kallsyms_S} ${kallsyms_bin} > > info AS ${kallsyms_S} > ${CC} ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS} \ > -- > 2.44.0.rc0.258.g7320e95886-goog > -- Best Regards Masahiro Yamada