Em Thu, Jan 10, 2019 at 02:50:32PM -0300, Arnaldo Carvalho de Melo escreveu: > So, the size for an enumeration in BTF is in bytes, not in bits, as in > CTF, fixed with the patch below, looking at the other cases now. We're down to packed data structs, probably in the BTF encoder, see below. $ diff -u /tmp/tcp.o.dwarf.c /tmp/tcp.o.btf.c --- /tmp/tcp.o.dwarf.c 2019-01-10 12:10:26.890521322 -0300 +++ /tmp/tcp.o.btf.c 2019-01-10 14:46:04.584743337 -0300 @@ -585,11 +585,17 @@ __u16 vesapm_off; /* 48 2 */ __u16 pages; /* 50 2 */ __u16 vesa_attributes; /* 52 2 */ - __u32 capabilities; /* 54 4 */ - __u32 ext_lfb_base; /* 58 4 */ + __u32 capabilities; /* 52 4 */ + __u32 ext_lfb_base; /* 56 4 */ + + /* XXX 2 bytes hole, try to pack */ + __u8 _reserved[2]; /* 62 2 */ Were these 4 extra bytes came from? The original struct is: /* * These are set up by the setup-routine at boot-time: */ struct screen_info { __u8 orig_x; /* 0x00 */ __u8 orig_y; /* 0x01 */ __u16 ext_mem_k; /* 0x02 */ __u16 orig_video_page; /* 0x04 */ __u8 orig_video_mode; /* 0x06 */ __u8 orig_video_cols; /* 0x07 */ __u8 flags; /* 0x08 */ __u8 unused2; /* 0x09 */ __u16 orig_video_ega_bx;/* 0x0a */ __u16 unused3; /* 0x0c */ __u8 orig_video_lines; /* 0x0e */ __u8 orig_video_isVGA; /* 0x0f */ __u16 orig_video_points;/* 0x10 */ /* VESA graphic mode -- linear frame buffer */ __u16 lfb_width; /* 0x12 */ __u16 lfb_height; /* 0x14 */ __u16 lfb_depth; /* 0x16 */ __u32 lfb_base; /* 0x18 */ __u32 lfb_size; /* 0x1c */ __u16 cl_magic, cl_offset; /* 0x20 */ __u16 lfb_linelength; /* 0x24 */ __u8 red_size; /* 0x26 */ __u8 red_pos; /* 0x27 */ __u8 green_size; /* 0x28 */ __u8 green_pos; /* 0x29 */ __u8 blue_size; /* 0x2a */ __u8 blue_pos; /* 0x2b */ __u8 rsvd_size; /* 0x2c */ __u8 rsvd_pos; /* 0x2d */ __u16 vesapm_seg; /* 0x2e */ __u16 vesapm_off; /* 0x30 */ __u16 pages; /* 0x32 */ __u16 vesa_attributes; /* 0x34 */ __u32 capabilities; /* 0x36 */ __u32 ext_lfb_base; /* 0x3a */ __u8 _reserved[2]; /* 0x3e */ } __attribute__((packed)); $ pahole --hex -F dwarf -C screen_info ~/git/build/v4.20-rc5/net/ipv4/tcp.o struct screen_info { __u8 orig_x; /* 0 0x1 */ __u8 orig_y; /* 0x1 0x1 */ __u16 ext_mem_k; /* 0x2 0x2 */ __u16 orig_video_page; /* 0x4 0x2 */ __u8 orig_video_mode; /* 0x6 0x1 */ __u8 orig_video_cols; /* 0x7 0x1 */ __u8 flags; /* 0x8 0x1 */ __u8 unused2; /* 0x9 0x1 */ __u16 orig_video_ega_bx; /* 0xa 0x2 */ __u16 unused3; /* 0xc 0x2 */ __u8 orig_video_lines; /* 0xe 0x1 */ __u8 orig_video_isVGA; /* 0xf 0x1 */ __u16 orig_video_points; /* 0x10 0x2 */ __u16 lfb_width; /* 0x12 0x2 */ __u16 lfb_height; /* 0x14 0x2 */ __u16 lfb_depth; /* 0x16 0x2 */ __u32 lfb_base; /* 0x18 0x4 */ __u32 lfb_size; /* 0x1c 0x4 */ __u16 cl_magic; /* 0x20 0x2 */ __u16 cl_offset; /* 0x22 0x2 */ __u16 lfb_linelength; /* 0x24 0x2 */ __u8 red_size; /* 0x26 0x1 */ __u8 red_pos; /* 0x27 0x1 */ __u8 green_size; /* 0x28 0x1 */ __u8 green_pos; /* 0x29 0x1 */ __u8 blue_size; /* 0x2a 0x1 */ __u8 blue_pos; /* 0x2b 0x1 */ __u8 rsvd_size; /* 0x2c 0x1 */ __u8 rsvd_pos; /* 0x2d 0x1 */ __u16 vesapm_seg; /* 0x2e 0x2 */ __u16 vesapm_off; /* 0x30 0x2 */ __u16 pages; /* 0x32 0x2 */ __u16 vesa_attributes; /* 0x34 0x2 */ __u32 capabilities; /* 0x36 0x4 */ __u32 ext_lfb_base; /* 0x3a 0x4 */ __u8 _reserved[2]; /* 0x3e 0x2 */ /* size: 64, cachelines: 1, members: 36 */ }; the dwarf loader/pahole printer seem to be doing the right thing, the BTF encoder, it seems, has issues with attribute((packed)) structs, see below. [acme@quaco pahole]$ pahole --hex -F dwarf -C screen_info ~/git/build/v4.20-rc5/net/ipv4/tcp.o > /tmp/dwarf [acme@quaco pahole]$ pahole --hex -F btf -C screen_info ~/git/build/v4.20-rc5/net/ipv4/tcp.o > /tmp/btf [acme@quaco pahole]$ diff -u /tmp/dwarf /tmp/btf --- /tmp/dwarf 2019-01-10 15:00:14.946723885 -0300 +++ /tmp/btf 2019-01-10 15:00:18.669757798 -0300 @@ -32,9 +32,15 @@ __u16 vesapm_off; /* 0x30 0x2 */ __u16 pages; /* 0x32 0x2 */ __u16 vesa_attributes; /* 0x34 0x2 */ - __u32 capabilities; /* 0x36 0x4 */ + __u32 capabilities; /* 0x34 0x4 */ - __u32 ext_lfb_base; /* 0x3a 0x4 */ + __u32 ext_lfb_base; /* 0x38 0x4 */ + + /* XXX 2 bytes hole, try to pack */ + __u8 _reserved[2]; /* 0x3e 0x2 */ /* size: 64, cachelines: 1, members: 36 */ + /* sum members: 60, holes: 1, sum holes: 2 */ + + /* BRAIN FART ALERT! 64 != 60 + 2(holes), diff = 2 */ }; [acme@quaco pahole]$ Why the BTF encoder or loader is getting this in the wrong place... Debugging... /* size: 64, cachelines: 1, members: 36 */ + /* sum members: 60, holes: 1, sum holes: 2 */ + + /* BRAIN FART ALERT! 64 != 60 + 2(holes), diff = 2 */ }; struct apm_bios_info { __u16 version; /* 0 2 */ @@ -630,7 +636,10 @@ __u32 sectors_per_track; /* 12 4 */ __u64 number_of_sectors; /* 16 8 */ __u16 bytes_per_sector; /* 24 2 */ - __u32 dpte_ptr; /* 26 4 */ + __u32 dpte_ptr; /* 24 4 */ Looks like the same problem as with screen_info, 'dpte_ptr' got "unionized" with 'bytes_per_sector' + + /* XXX 2 bytes hole, try to pack */ + __u16 key; /* 30 2 */ __u8 device_path_info_length; /* 32 1 */ __u8 reserved2; /* 33 1 */ @@ -683,7 +692,10 @@ } atapi; /* 56 16 */ struct { __u16 id; /* 56 2 */ - __u64 lun; /* 58 8 */ + __u64 lun; /* 56 8 */ Ditto, here it could be trying to put 'lun' in its natural alignment, probably 'struct edd_device_params' is packed like screen_info? Yes... include/uapi/linux/edd.h line 72 + + /* XXX 2 bytes hole, try to pack */ + /* --- cacheline 1 boundary (64 bytes) was 2 bytes ago --- */ __u16 reserved1; /* 66 2 */ __u32 reserved2; /* 68 4 */ @@ -733,7 +745,10 @@ __u8 checksum; /* 73 1 */ /* size: 74, cachelines: 2, members: 18 */ + /* sum members: 70, holes: 1, sum holes: 2 */ /* last cacheline: 10 bytes */ + + /* BRAIN FART ALERT! 74 != 70 + 2(holes), diff = 2 */ }; struct edd_info { __u8 device; /* 0 1 */ @@ -849,10 +864,13 @@ }; struct desc_ptr { short unsigned int size; /* 0 2 */ - long unsigned int address; /* 2 8 */ + long unsigned int address; /* 0 8 */ One more packed struct: struct desc_ptr { unsigned short size; unsigned long address; } __attribute__((packed)) ; /* size: 10, cachelines: 1, members: 2 */ + /* padding: 2 */ /* last cacheline: 10 bytes */ + + /* BRAIN FART ALERT! 10 != 2 + 0(holes), diff = 8 */ }; struct pgprot { pgprotval_t pgprot; /* 0 8 */ @@ -1467,10 +1485,13 @@ }; struct x86_hw_tss { Also a packed struct u32 reserved1; /* 0 4 */ - u64 sp0; /* 4 8 */ - u64 sp1; /* 12 8 */ - u64 sp2; /* 20 8 */ - u64 reserved2; /* 28 8 */ + u64 sp0; /* 0 8 */ + u64 sp1; /* 8 8 */ + u64 sp2; /* 16 8 */ + u64 reserved2; /* 24 8 */ + + /* XXX 4 bytes hole, try to pack */ + u64 ist[7]; /* 36 56 */ /* --- cacheline 1 boundary (64 bytes) was 28 bytes ago --- */ u32 reserved3; /* 92 4 */ @@ -1479,7 +1500,10 @@ u16 io_bitmap_base; /* 102 2 */ /* size: 104, cachelines: 2, members: 10 */ + /* sum members: 96, holes: 1, sum holes: 4 */ /* last cacheline: 40 bytes */ + + /* BRAIN FART ALERT! 104 != 96 + 4(holes), diff = 4 */ }; struct seq_operations { void * (*start)(struct seq_file *, loff_t *); /* 0 8 */ This last one is interesting, was this done on purpose? I think one level of indirection more and we would keep the original expressiveness. @@ -8473,7 +8497,7 @@ struct lruvec_stat * lruvec_stat_cpu; /* 136 8 */ atomic_long_t lruvec_stat[30]; /* 144 240 */ /* --- cacheline 6 boundary (384 bytes) --- */ - long unsigned int lru_zone_size[5][5]; /* 384 200 */ + long unsigned int lru_zone_size[25]; /* 384 200 */ /* --- cacheline 9 boundary (576 bytes) was 8 bytes ago --- */ struct mem_cgroup_reclaim_iter iter[13]; /* 584 208 */ /* --- cacheline 12 boundary (768 bytes) was 24 bytes ago --- */ - Arnaldo