Re: [PATCH 13/15] modpost: Add support for hashing long symbol names

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jun 18, 2024 at 2:58 AM Sami Tolvanen <samitolvanen@xxxxxxxxxx> wrote:
>
> Rust frequently has symbol names longer than MODULE_NAME_LEN, because
> the full namespace path is encoded into the mangled name. Instead of
> modpost failing when running into a long name with CONFIG_MODVERSIONS,
> store a hash of the name in struct modversion_info.
>
> To avoid breaking userspace tools that parse the __versions array,
> include a null-terminated hash function name at the beginning of the
> name string, followed by a binary hash. In order to keep .mod.c files
> more human-readable, add a comment with the full symbol name before the
> array entry. Example output in rust_minimal.mod.c:
>
>   static const struct modversion_info ____versions[]
>   __used __section("__versions") = {
>         /* _RNvNtNtCs1cdwasc6FUb_6kernel5print14format_strings4INFO */
>         { 0x9ec5442f, "sha256\x00\x56\x96\xf4\x27\xdb\x4a\xbf[...]" },
>         { 0x1d6595b1, "_RNvNtCs1cdwasc6FUb_6kernel5print11call_printk" },
>         { 0x3c642974, "__rust_dealloc" },
>         ...
>   };
>
> modprobe output for the resulting module:
>
>   $ modprobe --dump-modversions rust_minimal.ko
>   0x9ec5442f    sha256
>   0x1d6595b1    _RNvNtCs1cdwasc6FUb_6kernel5print11call_printk
>   0x3c642974    __rust_dealloc
>   ...
>
> While the output is less useful, the tool continues to work and can be
> later updated to produce more helpful output for hashed symbols.
>
> Note that this patch adds a generic SHA-256 implementation to modpost
> adapted from the Crypto API, but other hash functions may be used in
> future if needed.
>
> Suggested-by: Matthew Maurer <mmaurer@xxxxxxxxxx>
> Signed-off-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx>
> ---
>  scripts/mod/Makefile  |   4 +-
>  scripts/mod/modpost.c |  20 ++-
>  scripts/mod/modpost.h |  20 +++
>  scripts/mod/symhash.c | 327 ++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 364 insertions(+), 7 deletions(-)
>  create mode 100644 scripts/mod/symhash.c
>
> diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
> index c729bc936bae..ddd59ea9027e 100644
> --- a/scripts/mod/Makefile
> +++ b/scripts/mod/Makefile
> @@ -4,7 +4,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
>  hostprogs-always-y     += modpost mk_elfconfig
>  always-y               += empty.o
>
> -modpost-objs   := modpost.o file2alias.o sumversion.o symsearch.o
> +modpost-objs   := modpost.o file2alias.o symhash.o sumversion.o symsearch.o
>
>  devicetable-offsets-file := devicetable-offsets.h
>
> @@ -15,7 +15,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s
>
>  # dependencies on generated files need to be listed explicitly
>
> -$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
> +$(obj)/modpost.o $(obj)/file2alias.o $(obj)/symhash.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
>  $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)
>
>  quiet_cmd_elfconfig = MKELF   $@
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index f48d72d22dc2..2631e3e75a5c 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -1900,7 +1900,10 @@ static void add_exported_symbols(struct buffer *buf, struct module *mod)
>   **/
>  static void add_versions(struct buffer *b, struct module *mod)
>  {
> +       char hash[SYMHASH_STR_LEN];
>         struct symbol *s;
> +       const char *name;
> +       size_t len;
>
>         if (!modversions)
>                 return;
> @@ -1917,13 +1920,20 @@ static void add_versions(struct buffer *b, struct module *mod)
>                                 s->name, mod->name);
>                         continue;
>                 }
> -               if (strlen(s->name) >= MODULE_NAME_LEN) {
> -                       error("too long symbol \"%s\" [%s.ko]\n",
> -                             s->name, mod->name);
> -                       break;
> +               len = strlen(s->name);
> +               /*
> +                * For symbols with a long name, use the hash format, but include
> +                * the full symbol name as a comment to keep the generated files
> +                * human-readable.
> +                */
> +               if (len >= MODULE_NAME_LEN) {
> +                       buf_printf(b, "\t/* %s */\n", s->name);
> +                       name = symhash_str(s->name, len, hash);
> +               } else {
> +                       name = s->name;
>                 }
>                 buf_printf(b, "\t{ %#8x, \"%s\" },\n",
> -                          s->crc, s->name);
> +                          s->crc, name);
>         }
>
>         buf_printf(b, "};\n");
> diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
> index ee43c7950636..cd2eb718936b 100644
> --- a/scripts/mod/modpost.h
> +++ b/scripts/mod/modpost.h
> @@ -183,6 +183,26 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
>                         Elf_Sym *sym, const char *symname);
>  void add_moddevtable(struct buffer *buf, struct module *mod);
>
> +/* symhash.c */
> +/*
> + * For symbol names longer than MODULE_NAME_LEN, encode a hash of the
> + * symbol name in version information as follows:
> + *
> + * <hash name>\0<binary hash of the symbol name>
> + *
> + * e.g. as a string in .mod.c files:
> + * "sha256\x00\xNN{32}"
> + *
> + * The string is null terminated after the hash name to avoid breaking
> + * userspace tools that parse the __versions table and don't understand
> + * the format.
> + */
> +#define SYMHASH_STR_PREFIX     "sha256\\x00"
> +#define SYMHASH_STR_PREFIX_LEN (sizeof(SYMHASH_STR_PREFIX) - 1)
> +#define SYMHASH_STR_LEN                (SYMHASH_STR_PREFIX_LEN + 4*32 + 1)
> +
> +char *symhash_str(const char *name, size_t len, char hash_str[SYMHASH_STR_LEN]);
> +
>  /* sumversion.c */
>  void get_src_version(const char *modname, char sum[], unsigned sumlen);
>
> diff --git a/scripts/mod/symhash.c b/scripts/mod/symhash.c
> new file mode 100644
> index 000000000000..d0c9cf5f1f6c
> --- /dev/null
> +++ b/scripts/mod/symhash.c
> @@ -0,0 +1,327 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * symhash.c
> + *
> + * Symbol name hashing using a SHA-256 implementation adapted from the
> + * Cryptographic API.
> + */
> +#include <byteswap.h>
> +#include "modpost.h"
> +
> +#if HOST_ELFDATA == ELFDATA2MSB
> +/* Big endian */
> +#define be32_to_cpu(val) (val)
> +#define cpu_to_be32(val) (val)
> +#define cpu_to_be64(val) (val)
> +#else
> +/* Little endian */
> +#define be32_to_cpu(val) bswap_32(val)
> +#define cpu_to_be32(val) bswap_32(val)
> +#define cpu_to_be64(val) bswap_64(val)
> +#endif
> +
> +#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory")




I know this is a copy-paste of the kernel space code,
but is barrier_data() also necessary for host programs?





> +
> +static inline void memzero_explicit(void *s, size_t count)
> +{
> +       memset(s, 0, count);
> +       barrier_data(s);
> +}
> +



--
Best Regards
Masahiro Yamada





[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux