Em Tue, Oct 20, 2020 at 03:14:59PM -0300, Arnaldo Carvalho de Melo escreveu: > Em Tue, Oct 20, 2020 at 10:10:19AM -0700, Andrii Nakryiko escreveu: > > On Tue, Oct 20, 2020 at 10:05 AM Hao Luo <haoluo@xxxxxxxxxx> wrote: > > > Thanks for reporting this and cc'ing me. I forgot to update the error > > > messages when renaming the flags. I will send a patch to fix the error > > > message. > > > > The commit > > > > commit f3d9054ba8ff1df0fc44e507e3a01c0964cabd42 > > > Author: Hao Luo <haoluo@xxxxxxxxxx> > > > AuthorDate: Wed Jul 8 13:44:10 2020 -0700 > > > > btf_encoder: Teach pahole to store percpu variables in vmlinux BTF. > > > > encodes kernel global variables into BTF so that bpf programs can > > > directly access them. If there is no need to access kernel global > > > variables, it's perfectly fine to use '--btf_encode_force' to skip > > > encoding bad symbols into BTF, or '--skip_encoding_btf_vars' to skip > > > encoding all global vars all together. I will add these info into the > > > updated error message. > > > > Also cc bpf folks for attention of this bug. > > > I've already fixed the message as part of > > 2e719cca6672 ("btf_encoder: revamp how per-CPU variables are encoded") > > > It's currently still in the tmp.libbtf_encoder branch in pahole repo. > > I'm now running: > > $ grep BTF=y ../build/s390x-v5.9.0+/.config > CONFIG_DEBUG_INFO_BTF=y > $ make -j24 CROSS_COMPILE=s390x-linux-gnu- ARCH=s390 O=../build/s390x-v5.9.0+/ $ ls -la /home/acme/git/build/s390x-v5.9.0+/.tmp_vmlinux.btf -rwxrwxr-x. 1 acme acme 304592928 Oct 20 15:26 /home/acme/git/build/s390x-v5.9.0+/.tmp_vmlinux.btf $ file /home/acme/git/build/s390x-v5.9.0+/.tmp_vmlinux.btf /home/acme/git/build/s390x-v5.9.0+/.tmp_vmlinux.btf: ELF 64-bit MSB executable, IBM S/390, version 1 (SYSV), statically linked, BuildID[sha1]=ed39402fdbd7108c1055baaa61cfc6b0e431901d, with debug_info, not stripped $ pahole -F btf -C list_head /home/acme/git/build/s390x-v5.9.0+/.tmp_vmlinux.btf struct list_head { struct list_head * next; /* 0 8 */ struct list_head * prev; /* 8 8 */ /* size: 16, cachelines: 1, members: 2 */ /* last cacheline: 16 bytes */ }; $ $ readelf -wi /home/acme/git/build/s390x-v5.9.0+/vmlinux | grep -m2 DW_AT_producer <28> DW_AT_producer : (indirect string, offset: 0x51): GNU AS 2.34 <3b> DW_AT_producer : (indirect string, offset: 0xeb46): GNU C89 9.2.1 20190827 (Red Hat Cross 9.2.1-3) -m64 -mwarn-dynamicstack -mbackchain -msoft-float -march=z196 -mtune=z196 -mpacked-stack -mindirect-branch=thunk -mfunction-return=thunk -mindirect-branch-table -mrecord-mcount -mnop-mcount -mfentry -mzarch -g -O2 -std=gnu90 -p -fno-strict-aliasing -fno-common -fshort-wchar -fPIE -fno-asynchronous-unwind-tables -fno-delete-null-pointer-checks -fno-reorder-blocks -fno-ipa-cp-clone -fno-partial-inlining -fno-stack-protector -fno-var-tracking-assignments -fno-inline-functions-called-once -falign-functions=32 -fno-strict-overflow -fstack-check=no -fconserve-stack -fno-function-sections -fno-data-sections -fsanitize=kernel-address -fasan-shadow-offset=0x18000000000000 -fsanitize=bounds -fsanitize=shift -fsanitize=integer-divide-by-zero -fsanitize=unreachable -fsanitize=signed-integer-overflow -fsanitize=object-size -fsanitize=bool -fsanitize=enum -fsanitize-undefined-trap-on-error -fsanitize-coverage=trace-pc -fsanitize-coverage=trace-cmp --param allow-store-data-races=0 --param asan-globals=1 --param asan-instrumentation-with-call-threshold=0 --param asan-stack=1 --param asan-instrument-allocas=1 $ $ file /home/acme/git/build/s390x-v5.9.0+/vmlinux /home/acme/git/build/s390x-v5.9.0+/vmlinux: ELF 64-bit MSB executable, IBM S/390, version 1 (SYSV), statically linked, BuildID[sha1]=fbb252d8dccc11d8e66d6f248d06bcdca4e7db7a, with debug_info, not stripped $ But I noticed that 'btfdiff' is showing differences from output generated from DWARF and the one generated from BTF, the first issue is: [acme@five pahole]$ btfdiff /home/acme/git/build/v5.9.0+/vmlinux <SNIP> @@ -115549,7 +120436,7 @@ struct irq_router_handler { /* XXX 6 bytes hole, try to pack */ - int (*probe)(struct irq_router * , struct pci_dev * , u16 ); /* 8 8 */ + int (*probe)(struct irq_router *, struct pci_dev *, u16); /* 8 8 */ /* size: 16, cachelines: 1, members: 2 */ /* sum members: 10, holes: 1, sum holes: 6 */ [acme@five pahole]$ The BTF output (the one starting with '+' in the diff output) is better, just different than it was before, I'll fix the DWARF one to avoid that needless space for arg lists without names. The other problem I noticed is a bit more worrying: @@ -52,13 +52,29 @@ struct file_system_type { /* last cacheline: 8 bytes */ }; struct qspinlock { - union ; /* 0 4 */ + union { + atomic_t val; /* 0 4 */ + struct { + u8 locked; /* 0 1 */ + u8 pending; /* 1 1 */ + }; /* 0 2 */ + struct { + u16 locked_pending; /* 0 2 */ + u16 tail; /* 2 2 */ + }; /* 0 4 */ + }; /* 0 4 */ /* size: 4, cachelines: 1, members: 1 */ /* last cacheline: 4 bytes */ }; struct qrwlock { - union ; /* 0 4 */ + union { + atomic_t cnts; /* 0 4 */ + struct { + u8 wlocked; /* 0 1 */ + u8 __lstate[3]; /* 1 3 */ + }; /* 0 4 */ + }; /* 0 4 */ arch_spinlock_t wait_lock; /* 4 4 */ /* size: 8, cachelines: 1, members: 2 */ But again, its the DWARF code that is wrong :-) So, for what is being tested here, which is BTF generation, things looks Ok: i.e. using BTF: [acme@five perf]$ pahole qspinlock struct qspinlock { union { atomic_t val; /* 0 4 */ struct { u8 locked; /* 0 1 */ u8 pending; /* 1 1 */ }; /* 0 2 */ struct { u16 locked_pending; /* 0 2 */ u16 tail; /* 2 2 */ }; /* 0 4 */ }; /* 0 4 */ /* size: 4, cachelines: 1, members: 1 */ /* last cacheline: 4 bytes */ }; [acme@five perf]$ While using DWARF: [acme@five perf]$ pahole -F dwarf -C qspinlock struct qspinlock { union ; /* 0 4 */ /* size: 4, cachelines: 1, members: 1 */ /* last cacheline: 4 bytes */ }; [acme@five perf]$ typedef struct qspinlock { union { atomic_t val; /* * By using the whole 2nd least significant byte for the * pending bit, we can allow better optimization of the lock * acquisition for the pending bit holder. */ #ifdef __LITTLE_ENDIAN struct { u8 locked; u8 pending; }; struct { u16 locked_pending; u16 tail; }; #else struct { u16 tail; u16 locked_pending; }; struct { u8 reserved[2]; u8 pending; u8 locked; }; #endif }; } arch_spinlock_t; This is just a heads up, will investigate further... - Arnaldo > To do the last test I wanted before moving it to master.