On 11/21/2024 9:42 PM, Sami Tolvanen wrote:
Add a basic DWARF parser, which uses libdw to traverse the debugging
information in an object file and looks for functions and variables.
In follow-up patches, this will be expanded to produce symbol versions
for CONFIG_MODVERSIONS from DWARF.
Signed-off-by: Sami Tolvanen <samitolvanen@xxxxxxxxxx>
Reviewed-by: Petr Pavlu <petr.pavlu@xxxxxxxx>
---
kernel/module/Kconfig | 8 ++
scripts/Makefile | 1 +
scripts/gendwarfksyms/.gitignore | 2 +
scripts/gendwarfksyms/Makefile | 8 ++
scripts/gendwarfksyms/dwarf.c | 166 ++++++++++++++++++++++++++
scripts/gendwarfksyms/gendwarfksyms.c | 126 +++++++++++++++++++
scripts/gendwarfksyms/gendwarfksyms.h | 100 ++++++++++++++++
scripts/gendwarfksyms/symbols.c | 96 +++++++++++++++
8 files changed, 507 insertions(+)
create mode 100644 scripts/gendwarfksyms/.gitignore
create mode 100644 scripts/gendwarfksyms/Makefile
create mode 100644 scripts/gendwarfksyms/dwarf.c
create mode 100644 scripts/gendwarfksyms/gendwarfksyms.c
create mode 100644 scripts/gendwarfksyms/gendwarfksyms.h
create mode 100644 scripts/gendwarfksyms/symbols.c
diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig
index 7c6588148d42..f9e5f82fa88b 100644
--- a/kernel/module/Kconfig
+++ b/kernel/module/Kconfig
@@ -169,6 +169,14 @@ config MODVERSIONS
make them incompatible with the kernel you are running. If
unsure, say N.
+config GENDWARFKSYMS
+ bool
+ depends on DEBUG_INFO
+ # Requires full debugging information, split DWARF not supported.
+ depends on !DEBUG_INFO_REDUCED && !DEBUG_INFO_SPLIT
+ # Requires ELF object files.
+ depends on !LTO
+
config ASM_MODVERSIONS
bool
default HAVE_ASM_MODVERSIONS && MODVERSIONS
diff --git a/scripts/Makefile b/scripts/Makefile
index 6bcda4b9d054..d7fec46d38c0 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -54,6 +54,7 @@ targets += module.lds
subdir-$(CONFIG_GCC_PLUGINS) += gcc-plugins
subdir-$(CONFIG_MODVERSIONS) += genksyms
+subdir-$(CONFIG_GENDWARFKSYMS) += gendwarfksyms
subdir-$(CONFIG_SECURITY_SELINUX) += selinux
subdir-$(CONFIG_SECURITY_IPE) += ipe
diff --git a/scripts/gendwarfksyms/.gitignore b/scripts/gendwarfksyms/.gitignore
new file mode 100644
index 000000000000..0927f8d3cd96
--- /dev/null
+++ b/scripts/gendwarfksyms/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+/gendwarfksyms
diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
new file mode 100644
index 000000000000..9f8fec4fd39b
--- /dev/null
+++ b/scripts/gendwarfksyms/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+hostprogs-always-y += gendwarfksyms
+
+gendwarfksyms-objs += gendwarfksyms.o
+gendwarfksyms-objs += dwarf.o
+gendwarfksyms-objs += symbols.o
+
+HOSTLDLIBS_gendwarfksyms := -ldw -lelf
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
new file mode 100644
index 000000000000..81df3e2ad3ae
--- /dev/null
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include "gendwarfksyms.h"
+
+static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
+{
+ Dwarf_Attribute da;
+
+ /* dwarf_formref_die returns a pointer instead of an error value. */
+ return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value);
+}
+
+#define DEFINE_GET_STRING_ATTR(attr) \
+ static const char *get_##attr##_attr(Dwarf_Die *die) \
+ { \
+ Dwarf_Attribute da; \
+ if (dwarf_attr(die, DW_AT_##attr, &da)) \
+ return dwarf_formstring(&da); \
+ return NULL; \
+ }
+
+DEFINE_GET_STRING_ATTR(name)
+DEFINE_GET_STRING_ATTR(linkage_name)
+
+static const char *get_symbol_name(Dwarf_Die *die)
+{
+ const char *name;
+
+ /* rustc uses DW_AT_linkage_name for exported symbols */
+ name = get_linkage_name_attr(die);
+ if (!name)
+ name = get_name_attr(die);
+
+ return name;
+}
+
+static bool match_export_symbol(struct state *state, Dwarf_Die *die)
+{
+ Dwarf_Die *source = die;
+ Dwarf_Die origin;
+
+ /* If the DIE has an abstract origin, use it for type information. */
+ if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
+ source = &origin;
+
+ state->sym = symbol_get(get_symbol_name(die));
+
+ /* Look up using the origin name if there are no matches. */
+ if (!state->sym && source != die)
+ state->sym = symbol_get(get_symbol_name(source));
+
+ state->die = *source;
+ return !!state->sym;
+}
+
+/*
+ * Type string processing
+ */
+static void process(const char *s)
+{
+ s = s ?: "<null>";
+
+ if (dump_dies)
+ fputs(s, stderr);
+}
+
+bool match_all(Dwarf_Die *die)
+{
+ return true;
+}
+
+int process_die_container(struct state *state, Dwarf_Die *die,
+ die_callback_t func, die_match_callback_t match)
+{
+ Dwarf_Die current;
+ int res;
+
+ res = checkp(dwarf_child(die, ¤t));
+ while (!res) {
+ if (match(¤t)) {
+ /* <0 = error, 0 = continue, >0 = stop */
+ res = checkp(func(state, ¤t));
+ if (res)
+ return res;
+ }
+
+ res = checkp(dwarf_siblingof(¤t, ¤t));
+ }
+
+ return 0;
+}
+
+/*
+ * Exported symbol processing
+ */
+static void process_symbol(struct state *state, Dwarf_Die *die,
+ die_callback_t process_func)
+{
+ debug("%s", state->sym->name);
+ check(process_func(state, die));
+ if (dump_dies)
+ fputs("\n", stderr);
+}
+
+static int __process_subprogram(struct state *state, Dwarf_Die *die)
+{
+ process("subprogram");
+ return 0;
+}
+
+static void process_subprogram(struct state *state, Dwarf_Die *die)
+{
+ process_symbol(state, die, __process_subprogram);
+}
+
+static int __process_variable(struct state *state, Dwarf_Die *die)
+{
+ process("variable ");
+ return 0;
+}
+
+static void process_variable(struct state *state, Dwarf_Die *die)
+{
+ process_symbol(state, die, __process_variable);
+}
+
+static int process_exported_symbols(struct state *unused, Dwarf_Die *die)
+{
+ int tag = dwarf_tag(die);
+
+ switch (tag) {
+ /* Possible containers of exported symbols */
+ case DW_TAG_namespace:
+ case DW_TAG_class_type:
+ case DW_TAG_structure_type:
+ return check(process_die_container(
+ NULL, die, process_exported_symbols, match_all));
+
+ /* Possible exported symbols */
+ case DW_TAG_subprogram:
+ case DW_TAG_variable: {
+ struct state state;
+
+ if (!match_export_symbol(&state, die))
+ return 0;
+
+ if (tag == DW_TAG_subprogram)
+ process_subprogram(&state, &state.die);
+ else
+ process_variable(&state, &state.die);
+
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+void process_cu(Dwarf_Die *cudie)
+{
+ check(process_die_container(NULL, cudie, process_exported_symbols,
+ match_all));
+}
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
new file mode 100644
index 000000000000..f84fa98fcbdb
--- /dev/null
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+#include "gendwarfksyms.h"
+
+/*
+ * Options
+ */
+
+/* Print debugging information to stderr */
+int debug;
+/* Dump DIE contents */
+int dump_dies;
+
+static void usage(void)
+{
+ fputs("Usage: gendwarfksyms [options] elf-object-file ... < symbol-list\n\n"
+ "Options:\n"
+ " -d, --debug Print debugging information\n"
+ " --dump-dies Dump DWARF DIE contents\n"
+ " -h, --help Print this message\n"
+ "\n",
+ stderr);
+}
+
+static int process_module(Dwfl_Module *mod, void **userdata, const char *name,
+ Dwarf_Addr base, void *arg)
+{
+ Dwarf_Addr dwbias;
+ Dwarf_Die cudie;
+ Dwarf_CU *cu = NULL;
+ Dwarf *dbg;
+ int res;
+
+ debug("%s", name);
+ dbg = dwfl_module_getdwarf(mod, &dwbias);
+
+ do {
+ res = dwarf_get_units(dbg, cu, &cu, NULL, NULL, &cudie, NULL);
+ if (res < 0)
+ error("dwarf_get_units failed: no debugging information?");
+ if (res == 1)
+ break; /* No more units */
+
+ process_cu(&cudie);
+ } while (cu);
+
+ return DWARF_CB_OK;
+}
+
+static const Dwfl_Callbacks callbacks = {
+ .section_address = dwfl_offline_section_address,
+ .find_debuginfo = dwfl_standard_find_debuginfo,
+};
+
+int main(int argc, char **argv)
+{
+ unsigned int n;
+ int opt;
+
+ struct option opts[] = { { "debug", 0, NULL, 'd' },
+ { "dump-dies", 0, &dump_dies, 1 },
+ { "help", 0, NULL, 'h' },
+ { 0, 0, NULL, 0 } };
+
+ while ((opt = getopt_long(argc, argv, "dh", opts, NULL)) != EOF) {
+ switch (opt) {
+ case 0:
+ break;
+ case 'd':
+ debug = 1;
+ break;
+ case 'h':
+ usage();
+ return 0;
+ default:
+ usage();
+ return 1;
+ }
+ }
+
+ if (optind >= argc) {
+ usage();
+ error("no input files?");
+ }
+
+ symbol_read_exports(stdin);
+
+ for (n = optind; n < argc; n++) {
+ Dwfl *dwfl;
+ int fd;
+
+ fd = open(argv[n], O_RDONLY);
+ if (fd == -1)
+ error("open failed for '%s': %s", argv[n],
+ strerror(errno));
+
+ dwfl = dwfl_begin(&callbacks);
+ if (!dwfl)
+ error("dwfl_begin failed for '%s': %s", argv[n],
+ dwarf_errmsg(-1));
+
+ if (!dwfl_report_offline(dwfl, argv[n], argv[n], fd))
+ error("dwfl_report_offline failed for '%s': %s",
+ argv[n], dwarf_errmsg(-1));
+
+ dwfl_report_end(dwfl, NULL, NULL);
+
+ if (dwfl_getmodules(dwfl, &process_module, NULL, 0))
+ error("dwfl_getmodules failed for '%s'", argv[n]);
+
+ dwfl_end(dwfl);
+ }
+
+ symbol_free();
+
+ return 0;
+}
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
new file mode 100644
index 000000000000..23e484af5d22
--- /dev/null
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#define _GNU_SOURCE
I'm getting these warnings:
scripts/gendwarfksyms/kabi.c:245:6: warning: implicit declaration of
function 'asprintf' is invalid in C99 [-Wimplicit-function-declaration]
if (asprintf(&target, "%s %s", fqn, field) < 0)
^
1 warning generated.
HOSTCC scripts/gendwarfksyms/symbols.o
HOSTCC scripts/gendwarfksyms/types.o
scripts/gendwarfksyms/types.c:260:6: warning: implicit declaration of
function 'asprintf' is invalid in C99 [-Wimplicit-function-declaration]
if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn,
quote) < 0)
^
1 warning generated.
I think it may be cleaner to define _GNU_SOURCE in the CFLAGS instead.
+
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <hash.h>
+#include <hashtable.h>
+#include <list.h>
+#include <xalloc.h>
+
+#ifndef __GENDWARFKSYMS_H
+#define __GENDWARFKSYMS_H
+
+/*
+ * Options -- in gendwarfksyms.c
+ */
+extern int debug;
+extern int dump_dies;
+
+/*
+ * Output helpers
+ */
+#define __PREFIX "gendwarfksyms: "
+#define __println(prefix, format, ...) \
+ fprintf(stderr, prefix __PREFIX "%s: " format "\n", __func__, \
+ ##__VA_ARGS__)
+
+#define debug(format, ...) \
+ do { \
+ if (debug) \
+ __println("", format, ##__VA_ARGS__); \
+ } while (0)
+
+#define warn(format, ...) __println("warning: ", format, ##__VA_ARGS__)
+#define error(format, ...) \
+ do { \
+ __println("error: ", format, ##__VA_ARGS__); \
+ exit(1); \
+ } while (0)
+
+/*
+ * Error handling helpers
+ */
+#define __check(expr, test) \
+ ({ \
+ int __res = expr; \
+ if (test) \
+ error("`%s` failed: %d", #expr, __res); \
+ __res; \
+ })
+
+/* Error == non-zero values */
+#define check(expr) __check(expr, __res)
+/* Error == negative values */
+#define checkp(expr) __check(expr, __res < 0)
+
+/*
+ * symbols.c
+ */
+
+struct symbol {
+ const char *name;
+ struct hlist_node name_hash;
+};
+
+typedef void (*symbol_callback_t)(struct symbol *, void *arg);
+
+void symbol_read_exports(FILE *file);
+struct symbol *symbol_get(const char *name);
+void symbol_free(void);
+
+/*
+ * dwarf.c
+ */
+
+struct state {
+ struct symbol *sym;
+ Dwarf_Die die;
+};
+
+typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die);
+typedef bool (*die_match_callback_t)(Dwarf_Die *die);
+bool match_all(Dwarf_Die *die);
+
+int process_die_container(struct state *state, Dwarf_Die *die,
+ die_callback_t func, die_match_callback_t match);
+
+void process_cu(Dwarf_Die *cudie);
+
+#endif /* __GENDWARFKSYMS_H */
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
new file mode 100644
index 000000000000..2c901670224b
--- /dev/null
+++ b/scripts/gendwarfksyms/symbols.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include "gendwarfksyms.h"
+
+#define SYMBOL_HASH_BITS 15
+static HASHTABLE_DEFINE(symbol_names, 1 << SYMBOL_HASH_BITS);
+
+static unsigned int for_each(const char *name, symbol_callback_t func,
+ void *data)
+{
+ struct hlist_node *tmp;
+ struct symbol *match;
+
+ if (!name || !*name)
+ return 0;
+
+ hash_for_each_possible_safe(symbol_names, match, tmp, name_hash,
+ hash_str(name)) {
+ if (strcmp(match->name, name))
+ continue;
+
+ if (func)
+ func(match, data);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static bool is_exported(const char *name)
+{
+ return for_each(name, NULL, NULL) > 0;
+}
+
+void symbol_read_exports(FILE *file)
+{
+ struct symbol *sym;
+ char *line = NULL;
+ char *name = NULL;
+ size_t size = 0;
+ int nsym = 0;
+
+ while (getline(&line, &size, file) > 0) {
+ if (sscanf(line, "%ms\n", &name) != 1)
+ error("malformed input line: %s", line);
+
+ if (is_exported(name)) {
+ /* Ignore duplicates */
+ free(name);
+ continue;
+ }
+
+ sym = xcalloc(1, sizeof(struct symbol));
+ sym->name = name;
+
+ hash_add(symbol_names, &sym->name_hash, hash_str(sym->name));
+ ++nsym;
+
+ debug("%s", sym->name);
+ }
+
+ free(line);
+ debug("%d exported symbols", nsym);
+}
+
+static void get_symbol(struct symbol *sym, void *arg)
+{
+ struct symbol **res = arg;
+
+ *res = sym;
+}
+
+struct symbol *symbol_get(const char *name)
+{
+ struct symbol *sym = NULL;
+
+ for_each(name, get_symbol, &sym);
+ return sym;
+}
+
+void symbol_free(void)
+{
+ struct hlist_node *tmp;
+ struct symbol *sym;
+
+ hash_for_each_safe(symbol_names, sym, tmp, name_hash) {
+ free((void *)sym->name);
+ free(sym);
+ }
+
+ hash_init(symbol_names);
+}