On Tue, Jun 18, 2024 at 2:58 AM Sami Tolvanen <samitolvanen@xxxxxxxxxx> wrote: > > Rust frequently has symbol names longer than MODULE_NAME_LEN, because > the full namespace path is encoded into the mangled name. Instead of > modpost failing when running into a long name with CONFIG_MODVERSIONS, > store a hash of the name in struct modversion_info. > > To avoid breaking userspace tools that parse the __versions array, > include a null-terminated hash function name at the beginning of the > name string, followed by a binary hash. In order to keep .mod.c files > more human-readable, add a comment with the full symbol name before the > array entry. Example output in rust_minimal.mod.c: > > static const struct modversion_info ____versions[] > __used __section("__versions") = { > /* _RNvNtNtCs1cdwasc6FUb_6kernel5print14format_strings4INFO */ > { 0x9ec5442f, "sha256\x00\x56\x96\xf4\x27\xdb\x4a\xbf[...]" }, > { 0x1d6595b1, "_RNvNtCs1cdwasc6FUb_6kernel5print11call_printk" }, > { 0x3c642974, "__rust_dealloc" }, > ... > }; > > modprobe output for the resulting module: > > $ modprobe --dump-modversions rust_minimal.ko > 0x9ec5442f sha256 > 0x1d6595b1 _RNvNtCs1cdwasc6FUb_6kernel5print11call_printk > 0x3c642974 __rust_dealloc > ... > > While the output is less useful, the tool continues to work and can be > later updated to produce more helpful output for hashed symbols. > > Note that this patch adds a generic SHA-256 implementation to modpost > adapted from the Crypto API, but other hash functions may be used in > future if needed. > > Suggested-by: Matthew Maurer <mmaurer@xxxxxxxxxx> > Signed-off-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx> > --- > scripts/mod/Makefile | 4 +- > scripts/mod/modpost.c | 20 ++- > scripts/mod/modpost.h | 20 +++ > scripts/mod/symhash.c | 327 ++++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 364 insertions(+), 7 deletions(-) > create mode 100644 scripts/mod/symhash.c > > diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile > index c729bc936bae..ddd59ea9027e 100644 > --- a/scripts/mod/Makefile > +++ b/scripts/mod/Makefile > @@ -4,7 +4,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO) > hostprogs-always-y += modpost mk_elfconfig > always-y += empty.o > > -modpost-objs := modpost.o file2alias.o sumversion.o symsearch.o > +modpost-objs := modpost.o file2alias.o symhash.o sumversion.o symsearch.o > > devicetable-offsets-file := devicetable-offsets.h > > @@ -15,7 +15,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s > > # dependencies on generated files need to be listed explicitly > > -$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h > +$(obj)/modpost.o $(obj)/file2alias.o $(obj)/symhash.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h > $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file) > > quiet_cmd_elfconfig = MKELF $@ > diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c > index f48d72d22dc2..2631e3e75a5c 100644 > --- a/scripts/mod/modpost.c > +++ b/scripts/mod/modpost.c > @@ -1900,7 +1900,10 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod) > **/ > static void add_versions(struct buffer *b, struct module *mod) > { > + char hash[SYMHASH_STR_LEN]; > struct symbol *s; > + const char *name; > + size_t len; > > if (!modversions) > return; > @@ -1917,13 +1920,20 @@ static void add_versions(struct buffer *b, struct module *mod) > s->name, mod->name); > continue; > } > - if (strlen(s->name) >= MODULE_NAME_LEN) { > - error("too long symbol \"%s\" [%s.ko]\n", > - s->name, mod->name); > - break; > + len = strlen(s->name); > + /* > + * For symbols with a long name, use the hash format, but include > + * the full symbol name as a comment to keep the generated files > + * human-readable. > + */ > + if (len >= MODULE_NAME_LEN) { > + buf_printf(b, "\t/* %s */\n", s->name); > + name = symhash_str(s->name, len, hash); > + } else { > + name = s->name; > } > buf_printf(b, "\t{ %#8x, \"%s\" },\n", > - s->crc, s->name); > + s->crc, name); > } > > buf_printf(b, "};\n"); > diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h > index ee43c7950636..cd2eb718936b 100644 > --- a/scripts/mod/modpost.h > +++ b/scripts/mod/modpost.h > @@ -183,6 +183,26 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, > Elf_Sym *sym, const char *symname); > void add_moddevtable(struct buffer *buf, struct module *mod); > > +/* symhash.c */ > +/* > + * For symbol names longer than MODULE_NAME_LEN, encode a hash of the > + * symbol name in version information as follows: > + * > + * <hash name>\0<binary hash of the symbol name> > + * > + * e.g. as a string in .mod.c files: > + * "sha256\x00\xNN{32}" > + * > + * The string is null terminated after the hash name to avoid breaking > + * userspace tools that parse the __versions table and don't understand > + * the format. > + */ > +#define SYMHASH_STR_PREFIX "sha256\\x00" > +#define SYMHASH_STR_PREFIX_LEN (sizeof(SYMHASH_STR_PREFIX) - 1) > +#define SYMHASH_STR_LEN (SYMHASH_STR_PREFIX_LEN + 4*32 + 1) > + > +char *symhash_str(const char *name, size_t len, char hash_str[SYMHASH_STR_LEN]); > + > /* sumversion.c */ > void get_src_version(const char *modname, char sum[], unsigned sumlen); > > diff --git a/scripts/mod/symhash.c b/scripts/mod/symhash.c > new file mode 100644 > index 000000000000..d0c9cf5f1f6c > --- /dev/null > +++ b/scripts/mod/symhash.c > @@ -0,0 +1,327 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* > + * symhash.c > + * > + * Symbol name hashing using a SHA-256 implementation adapted from the > + * Cryptographic API. > + */ > +#include <byteswap.h> > +#include "modpost.h" > + > +#if HOST_ELFDATA == ELFDATA2MSB > +/* Big endian */ > +#define be32_to_cpu(val) (val) > +#define cpu_to_be32(val) (val) > +#define cpu_to_be64(val) (val) > +#else > +/* Little endian */ > +#define be32_to_cpu(val) bswap_32(val) > +#define cpu_to_be32(val) bswap_32(val) > +#define cpu_to_be64(val) bswap_64(val) > +#endif > + > +#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") I know this is a copy-paste of the kernel space code, but is barrier_data() also necessary for host programs? > + > +static inline void memzero_explicit(void *s, size_t count) > +{ > + memset(s, 0, count); > + barrier_data(s); > +} > + -- Best Regards Masahiro Yamada