On 8/7/19 5:32 PM, Andrii Nakryiko wrote: > Make .BTF section allocated and expose its contents through sysfs. > > /sys/kernel/btf directory is created to contain all the BTFs present > inside kernel. Currently there is only kernel's main BTF, represented as > /sys/kernel/btf/kernel file. Once kernel modules' BTFs are supported, > each module will expose its BTF as /sys/kernel/btf/<module-name> file. > > Current approach relies on a few pieces coming together: > 1. pahole is used to take almost final vmlinux image (modulo .BTF and > kallsyms) and generate .BTF section by converting DWARF info into > BTF. This section is not allocated and not mapped to any segment, > though, so is not yet accessible from inside kernel at runtime. > 2. objcopy dumps .BTF contents into binary file and subsequently > convert binary file into linkable object file with automatically > generated symbols _binary__btf_kernel_bin_start and > _binary__btf_kernel_bin_end, pointing to start and end, respectively, > of BTF raw data. > 3. final vmlinux image is generated by linking this object file (and > kallsyms, if necessary). sysfs_btf.c then creates > /sys/kernel/btf/kernel file and exposes embedded BTF contents through > it. This allows, e.g., libbpf and bpftool access BTF info at > well-known location, without resorting to searching for vmlinux image > on disk (location of which is not standardized and vmlinux image > might not be even available in some scenarios, e.g., inside qemu > during testing). > > Alternative approach using .incbin assembler directive to embed BTF > contents directly was attempted but didn't work, because sysfs_proc.o is > not re-compiled during link-vmlinux.sh stage. This is required, though, > to update embedded BTF data (initially empty data is embedded, then > pahole generates BTF info and we need to regenerate sysfs_btf.o with > updated contents, but it's too late at that point). > > If BTF couldn't be generated due to missing or too old pahole, > sysfs_btf.c handles that gracefully by detecting that > _binary__btf_kernel_bin_start (weak symbol) is 0 and not creating > /sys/kernel/btf at all. > > v1->v2: > - allow kallsyms stage to re-use vmlinux generated by gen_btf(); > > Cc: Masahiro Yamada <yamada.masahiro@xxxxxxxxxxxxx> > Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> > Cc: Jiri Olsa <jolsa@xxxxxxxxxx> > Cc: Sam Ravnborg <sam@xxxxxxxxxxxx> > Signed-off-by: Andrii Nakryiko <andriin@xxxxxx> > --- > kernel/bpf/Makefile | 3 +++ > kernel/bpf/sysfs_btf.c | 52 ++++++++++++++++++++++++++++++++++++++ > scripts/link-vmlinux.sh | 55 +++++++++++++++++++++++++++-------------- > 3 files changed, 91 insertions(+), 19 deletions(-) > create mode 100644 kernel/bpf/sysfs_btf.c > > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile > index 29d781061cd5..e1d9adb212f9 100644 > --- a/kernel/bpf/Makefile > +++ b/kernel/bpf/Makefile > @@ -22,3 +22,6 @@ obj-$(CONFIG_CGROUP_BPF) += cgroup.o > ifeq ($(CONFIG_INET),y) > obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o > endif > +ifeq ($(CONFIG_SYSFS),y) > +obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o > +endif > diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c > new file mode 100644 > index 000000000000..ac06ce1d62e8 > --- /dev/null > +++ b/kernel/bpf/sysfs_btf.c > @@ -0,0 +1,52 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Provide kernel BTF information for introspection and use by eBPF tools. > + */ > +#include <linux/kernel.h> > +#include <linux/module.h> > +#include <linux/kobject.h> > +#include <linux/init.h> > + > +/* See scripts/link-vmlinux.sh, gen_btf() func for details */ > +extern char __weak _binary__btf_kernel_bin_start[]; > +extern char __weak _binary__btf_kernel_bin_end[]; > + > +static ssize_t > +btf_kernel_read(struct file *file, struct kobject *kobj, > + struct bin_attribute *bin_attr, > + char *buf, loff_t off, size_t len) > +{ > + memcpy(buf, _binary__btf_kernel_bin_start + off, len); > + return len; > +} > + > +static struct bin_attribute btf_kernel_attr __ro_after_init = { > + .attr = { > + .name = "kernel", > + .mode = 0444, > + }, > + .read = btf_kernel_read, > +}; > + > +static struct bin_attribute *btf_attrs[] __ro_after_init = { > + &btf_kernel_attr, > + NULL, > +}; > + > +static struct attribute_group btf_group_attr __ro_after_init = { > + .name = "btf", > + .bin_attrs = btf_attrs, > +}; > + > +static int __init btf_kernel_init(void) > +{ > + if (!_binary__btf_kernel_bin_start) > + return 0; > + > + btf_kernel_attr.size = _binary__btf_kernel_bin_end - > + _binary__btf_kernel_bin_start; > + > + return sysfs_create_group(kernel_kobj, &btf_group_attr); > +} > + > +subsys_initcall(btf_kernel_init); > diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh > index a7124f895b24..e05abe19b11f 100755 > --- a/scripts/link-vmlinux.sh > +++ b/scripts/link-vmlinux.sh > @@ -56,8 +56,8 @@ modpost_link() > } > > # Link of vmlinux > -# ${1} - optional extra .o files > -# ${2} - output file > +# ${1} - output file > +# ${@:2} - optional extra .o files > vmlinux_link() > { > local lds="${objtree}/${KBUILD_LDS}" > @@ -70,9 +70,9 @@ vmlinux_link() > --start-group \ > ${KBUILD_VMLINUX_LIBS} \ > --end-group \ > - ${1}" > + ${@:2}" > > - ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ > + ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${1} \ > -T ${lds} ${objects} > else > objects="-Wl,--whole-archive \ > @@ -81,9 +81,9 @@ vmlinux_link() > -Wl,--start-group \ > ${KBUILD_VMLINUX_LIBS} \ > -Wl,--end-group \ > - ${1}" > + ${@:2}" > > - ${CC} ${CFLAGS_vmlinux} -o ${2} \ > + ${CC} ${CFLAGS_vmlinux} -o ${1} \ > -Wl,-T,${lds} \ > ${objects} \ > -lutil -lrt -lpthread > @@ -92,23 +92,34 @@ vmlinux_link() > } > > # generate .BTF typeinfo from DWARF debuginfo > +# ${1} - vmlinux image > +# ${2} - file to dump raw BTF data into > gen_btf() > { > - local pahole_ver; > + local pahole_ver > + local bin_arch > > if ! [ -x "$(command -v ${PAHOLE})" ]; then > info "BTF" "${1}: pahole (${PAHOLE}) is not available" > - return 0 > + return 1 > fi > > pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/') > if [ "${pahole_ver}" -lt "113" ]; then > info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13" > - return 0 > + return 1 > fi > > - info "BTF" ${1} > + info "BTF" ${2} > + vmlinux_link ${1} > LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1} > + > + # dump .BTF section into raw binary file to link with final vmlinux > + bin_arch=$(${OBJDUMP} -f ${1} | grep architecture | \ > + cut -d, -f1 | cut -d' ' -f2) > + ${OBJCOPY} --dump-section .BTF=.btf.kernel.bin ${1} 2>/dev/null > + ${OBJCOPY} -I binary -O ${CONFIG_OUTPUT_FORMAT} -B ${bin_arch} \ > + --rename-section .data=.BTF .btf.kernel.bin ${2} Currently, the binary size on my config is about 2.6MB. Do you think we could or need to compress it to make it smaller? I tried gzip and the compressed size is 0.9MB. > } > > # Create ${2} .o file with all symbols from the ${1} object file > @@ -153,6 +164,7 @@ sortextable() > # Delete output files in case of error > cleanup() > { > + rm -f .btf.* > rm -f .tmp_System.map > rm -f .tmp_kallsyms* > rm -f .tmp_vmlinux* > @@ -215,6 +227,13 @@ ${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o > info MODINFO modules.builtin.modinfo > ${OBJCOPY} -j .modinfo -O binary vmlinux.o modules.builtin.modinfo > > +btf_kernel_bin_o="" > +if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then > + if gen_btf .tmp_vmlinux1 .btf.kernel.bin.o ; then > + btf_kernel_bin_o=.btf.kernel.bin.o > + fi > +fi > + > kallsymso="" > kallsyms_vmlinux="" > if [ -n "${CONFIG_KALLSYMS}" ]; then > @@ -246,11 +265,14 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then > kallsyms_vmlinux=.tmp_vmlinux2 > > # step 1 > - vmlinux_link "" .tmp_vmlinux1 > + # skip building .tmp_vmlinux1 if gen_btf() already did that > + if [ -z "${btf_kernel_bin_o}" ]; then > + vmlinux_link .tmp_vmlinux1 > + fi > kallsyms .tmp_vmlinux1 .tmp_kallsyms1.o > > # step 2 > - vmlinux_link .tmp_kallsyms1.o .tmp_vmlinux2 > + vmlinux_link .tmp_vmlinux2 .tmp_kallsyms1.o ${btf_kernel_bin_o} > kallsyms .tmp_vmlinux2 .tmp_kallsyms2.o > > # step 3 > @@ -261,18 +283,13 @@ if [ -n "${CONFIG_KALLSYMS}" ]; then > kallsymso=.tmp_kallsyms3.o > kallsyms_vmlinux=.tmp_vmlinux3 > > - vmlinux_link .tmp_kallsyms2.o .tmp_vmlinux3 > - > + vmlinux_link .tmp_vmlinux3 .tmp_kallsyms2.o ${btf_kernel_bin_o} > kallsyms .tmp_vmlinux3 .tmp_kallsyms3.o > fi > fi > > info LD vmlinux > -vmlinux_link "${kallsymso}" vmlinux > - > -if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then > - gen_btf vmlinux > -fi > +vmlinux_link vmlinux "${kallsymso}" "${btf_kernel_bin_o}" > > if [ -n "${CONFIG_BUILDTIME_EXTABLE_SORT}" ]; then > info SORTEX vmlinux >