On 12/20/18 10:31 AM, Arnaldo Carvalho de Melo wrote: > Adding dwarves@xxxxxxxxxxxxxxx to the CC list. > > Em Tue, Dec 18, 2018 at 02:09:38PM -0800, Yonghong Song escreveu: >> This patch set included three btf related changes. > >> Patch #1 added support to do dwarf->btf conversion >> for multiple cu's and multiple binaries. >> Patch #2 added support to generate types for struct, >> union and fwd using kind_flag. The kernel support >> for kind_flag is here: >> https://urldefense.proofpoint.com/v2/url?u=https-3A__www.spinics.net_lists_netdev_msg539088.html&d=DwIBAg&c=5VD0RTtNlTh3ycd41b3MUw&r=DA8e1B5r073vIqRrFz7MRA&m=yUci7csgF5JQxbKJ-Qbe7_WSnzpZGNHBeR9v6sbfOX8&s=x1n4q5bAFOwgK8gMbeRHbtQ-PEXqurdDjCkhEjY-d6s&e= >> Patch #3 added func_proto support. The kernel support >> is here: >> https://urldefense.proofpoint.com/v2/url?u=https-3A__www.spinics.net_lists_netdev_msg534643.html&d=DwIBAg&c=5VD0RTtNlTh3ycd41b3MUw&r=DA8e1B5r073vIqRrFz7MRA&m=yUci7csgF5JQxbKJ-Qbe7_WSnzpZGNHBeR9v6sbfOX8&s=BLITwXmBAnM1RkuzWM9DFwGNw1YPlZArh7qR9OY-Azc&e= > >> Please see individual patch for details. > >> Yonghong Song (3): >> btf: permit multiple cu's for dwarf->btf conversion >> btf: fix struct/union/fwd types with kind_flag >> btf: add func_proto support > > Ok, I tested and processed those and added a few more, adding a btf > loader, pushed out to my git.kernel.org pahole repo. > > The btf loader is at: > > https://git.kernel.org/pub/scm/devel/pahole/pahole.git/commit/?id=472256d3c57bef53250686d55bd011f5dab4b657 > > Please take a look to see if I made some mistake. I looked at this issue. The reason is you did not take into the consideration of recent kind_flag. Basically, bitfield_size can be determined without looking at member types. if (kind_flag) bitfield_size = BTF_MEMBER_BITFIELD_SIZE(offset) else bitfield_size = 0 I will send a separate patch to address this issue. Feel free to reuse any part of it if you need more changes. > > For a net/ipv4/tcp.o from a kernel build, for struct sk_buff, shows > differences wrt the bit size of bitfield members, that is not encoded in > BTF at the moment: > > $ pahole -F btf -C sk_buff ~/git/build/v4.20-rc5+/net/ipv4/tcp.o > /tmp/sk_buff.btf > $ pahole -F dwarf -C sk_buff ~/git/build/v4.20-rc5+/net/ipv4/tcp.o > /tmp/sk_buff.dwarf > $ diff -u /tmp/sk_buff.dwarf /tmp/sk_buff.btf > --- /tmp/sk_buff.dwarf 2018-12-20 14:50:51.428653046 -0300 > +++ /tmp/sk_buff.btf 2018-12-20 14:50:46.302601516 -0300 > @@ -38,45 +38,45 @@ > __u16 hdr_len; /* 138 2 */ > __u16 queue_mapping; /* 140 2 */ > __u8 __cloned_offset[0]; /* 142 0 */ > - __u8 cloned:1; /* 142: 7 1 */ > - __u8 nohdr:1; /* 142: 6 1 */ > - __u8 fclone:2; /* 142: 4 1 */ > - __u8 peeked:1; /* 142: 3 1 */ > - __u8 head_frag:1; /* 142: 2 1 */ > - __u8 xmit_more:1; /* 142: 1 1 */ > - __u8 pfmemalloc:1; /* 142: 0 1 */ > + __u8 cloned; /* 142 1 */ > + __u8 nohdr; /* 142 1 */ > + __u8 fclone; /* 142 1 */ > + __u8 peeked; /* 142 1 */ > + __u8 head_frag; /* 142 1 */ > + __u8 xmit_more; /* 142 1 */ > + __u8 pfmemalloc; /* 142 1 */ With fixed btf loader, I got /* --- cacheline 2 boundary (128 bytes) --- */ __u16 mac_len; /* 128 2 */ __u16 hdr_len; /* 130 2 */ __u16 queue_mapping; /* 132 2 */ __u8 __cloned_offset[0]; /* 134 0 */ __u8 cloned:1; /* 134: 0 1 */ __u8 nohdr:1; /* 134: 1 1 */ __u8 fclone:2; /* 134: 2 1 */ __u8 peeked:1; /* 134: 4 1 */ __u8 head_frag:1; /* 134: 5 1 */ __u8 xmit_more:1; /* 134: 6 1 */ __u8 pfmemalloc:1; /* 134: 7 1 */ /* XXX 1 byte hole, try to pack */ __u32 headers_start[0]; /* 136 0 */ __u8 __pkt_type_offset[0]; /* 136 0 */ __u8 pkt_type:3; /* 136: 0 1 */ __u8 ignore_df:1; /* 136: 3 1 */ __u8 nf_trace:1; /* 136: 4 1 */ __u8 ip_summed:2; /* 136: 5 1 */ __u8 ooo_okay:1; /* 136: 7 1 */ __u8 l4_hash:1; /* 137: 0 1 */ __u8 sw_hash:1; /* 137: 1 1 */ Ignore the byte offset which may due to different kernel configurations. The bitfield offset in BTF starts from lower number to bigger one. That is, it is always following the big endian convention, bitfield_offset will be smaller if close to the top of structure. This is different from what dwarf is doing, which will show different things on little endian vs. big endian. You can make simple adjustment based on all available info. In btf_encoder.s, we did similar adjustment for little endian from dwarf to btf. > > /* XXX 1 byte hole, try to pack */ > > __u32 headers_start[0]; /* 144 0 */ > __u8 __pkt_type_offset[0]; /* 144 0 */ > - __u8 pkt_type:3; /* 144: 5 1 */ > - __u8 ignore_df:1; /* 144: 4 1 */ > - __u8 nf_trace:1; /* 144: 3 1 */ > - __u8 ip_summed:2; /* 144: 1 1 */ > - __u8 ooo_okay:1; /* 144: 0 1 */ > - __u8 l4_hash:1; /* 145: 7 1 */ > - __u8 sw_hash:1; /* 145: 6 1 */ > - __u8 wifi_acked_valid:1; /* 145: 5 1 */ > - __u8 wifi_acked:1; /* 145: 4 1 */ > - __u8 no_fcs:1; /* 145: 3 1 */ > - __u8 encapsulation:1; /* 145: 2 1 */ > - __u8 encap_hdr_csum:1; /* 145: 1 1 */ > - __u8 csum_valid:1; /* 145: 0 1 */ > - __u8 csum_complete_sw:1; /* 146: 7 1 */ > - __u8 csum_level:2; /* 146: 5 1 */ > - __u8 csum_not_inet:1; /* 146: 4 1 */ > - __u8 dst_pending_confirm:1; /* 146: 3 1 */ > - __u8 ndisc_nodetype:2; /* 146: 1 1 */ > - __u8 ipvs_property:1; /* 146: 0 1 */ > - __u8 inner_protocol_type:1; /* 147: 7 1 */ > - __u8 remcsum_offload:1; /* 147: 6 1 */ > - __u8 offload_fwd_mark:1; /* 147: 5 1 */ > - __u8 offload_mr_fwd_mark:1; /* 147: 4 1 */ > - __u8 tc_skip_classify:1; /* 147: 3 1 */ > - __u8 tc_at_ingress:1; /* 147: 2 1 */ > - __u8 tc_redirected:1; /* 147: 1 1 */ > - __u8 tc_from_ingress:1; /* 147: 0 1 */ > + __u8 pkt_type; /* 144 1 */ > + __u8 ignore_df; /* 144 1 */ > + __u8 nf_trace; /* 144 1 */ > + __u8 ip_summed; /* 144 1 */ > + __u8 ooo_okay; /* 144 1 */ > + __u8 l4_hash; /* 145 1 */ > + __u8 sw_hash; /* 145 1 */ > + __u8 wifi_acked_valid; /* 145 1 */ > + __u8 wifi_acked; /* 145 1 */ > + __u8 no_fcs; /* 145 1 */ > + __u8 encapsulation; /* 145 1 */ > + __u8 encap_hdr_csum; /* 145 1 */ > + __u8 csum_valid; /* 145 1 */ > + __u8 csum_complete_sw; /* 146 1 */ > + __u8 csum_level; /* 146 1 */ > + __u8 csum_not_inet; /* 146 1 */ > + __u8 dst_pending_confirm; /* 146 1 */ > + __u8 ndisc_nodetype; /* 146 1 */ > + __u8 ipvs_property; /* 146 1 */ > + __u8 inner_protocol_type; /* 147 1 */ > + __u8 remcsum_offload; /* 147 1 */ > + __u8 offload_fwd_mark; /* 147 1 */ > + __u8 offload_mr_fwd_mark; /* 147 1 */ > + __u8 tc_skip_classify; /* 147 1 */ > + __u8 tc_at_ingress; /* 147 1 */ > + __u8 tc_redirected; /* 147 1 */ > + __u8 tc_from_ingress; /* 147 1 */ > __u16 tc_index; /* 148 2 */ > > /* XXX 2 bytes hole, try to pack */ > $ > > For the whole net/ipv6/tcp.o: > > I'm getting a segfault when processing this entry: > > [acme@quaco pahole]$ pahole -F dwarf -C fpregs_state ~/git/build/v4.20-rc5+/net/ipv4/tcp.o > union fpregs_state { > struct fxregs_state fxsave; /* 0 512 */ > struct swregs_state soft; /* 0 136 */ > struct xregs_state xsave; /* 0 576 */ > u8 __padding[4096]; /* 0 4096 */ > }; > [acme@quaco pahole]$ > > [acme@quaco pahole]$ pahole -F btf -C fpregs_state ~/git/build/v4.20-rc5+/net/ipv4/tcp.o > Segmentation fault (core dumped) > [acme@quaco pahole]$ I can easily reproduce with the following simple file. -bash-4.4$ cat t.c union t { int a; int b; } g; Looks like the call chain likes print_classes cu__for_each_struct_or_union(cu, id, pos) { .... class__filter(...) But in btf_loader.c, struct is allocated with class__new(), but union is allocated with type__new(). Maybe some issue here? > > Have to stop now, will try to fix this later, please let me know if you > found the problem meanwhile :-) > > The difference up to the point where it tries to process that union is > just related to bitfields: > > --- /tmp/tcp.o.dwarf.c 2018-12-20 15:00:09.249222711 -0300 > +++ /tmp/tcp.o.btf.c 2018-12-20 15:22:00.935246130 -0300 > @@ -81,11 +81,13 @@ > const char * function; /* 8 8 */ > const char * filename; /* 16 8 */ > const char * format; /* 24 8 */ > - unsigned int lineno:18; /* 32:14 4 */ > - unsigned int flags:8; /* 32: 6 4 */ > + unsigned int lineno; /* 32 4 */ > > - /* XXX 6 bits hole, try to pack */ > - /* XXX 4 bytes hole, try to pack */ > + /* Bitfield combined with next fields */ > + > + unsigned int flags; /* 34 4 */ > + > + /* XXX 2 bytes hole, try to pack */ > > union { > struct static_key_true dd_key_true; /* 40 16 */ > @@ -93,8 +95,7 @@ > } key; /* 40 16 */ > > /* size: 56, cachelines: 1, members: 7 */ > - /* sum members: 52, holes: 1, sum holes: 4 */ > - /* bit holes: 1, sum bit holes: 6 bits */ > + /* sum members: 54, holes: 1, sum holes: 2 */ > /* last cacheline: 56 bytes */ > }; > struct file_operations { > @@ -223,7 +224,11 @@ > } futex; /* 8 40 */ > struct { > clockid_t clockid; /* 8 4 */ > - enum timespec_type type; /* 12 4 */ > + enum timespec_type type:4; /* 12: 0 1 */ > + > + /* XXX 4 bits hole, try to pack */ > + /* XXX 3 bytes hole, try to pack */ > + > union { > struct timespec * rmtp; /* 16 8 */ > struct old_timespec32 * compat_rmtp; /* 16 8 */ > @@ -320,22 +325,16 @@ > > long unsigned int jobctl; /* 1120 8 */ > unsigned int personality; /* 1128 4 */ > - unsigned int sched_reset_on_fork:1; /* 1132:31 4 */ > - unsigned int sched_contributes_to_load:1; /* 1132:30 4 */ > - unsigned int sched_migrated:1; /* 1132:29 4 */ > - unsigned int sched_remote_wakeup:1; /* 1132:28 4 */ > - > - /* XXX 28 bits hole, try to pack */ > - > - unsigned int in_execve:1; /* 1136:31 4 */ > - unsigned int in_iowait:1; /* 1136:30 4 */ > - unsigned int restore_sigmask:1; /* 1136:29 4 */ > - unsigned int in_user_fault:1; /* 1136:28 4 */ > - unsigned int no_cgroup_migration:1; /* 1136:27 4 */ > - unsigned int use_memdelay:1; /* 1136:26 4 */ > - > - /* XXX 26 bits hole, try to pack */ > - > + unsigned int sched_reset_on_fork; /* 1132 4 */ > + unsigned int sched_contributes_to_load; /* 1132 4 */ > + unsigned int sched_migrated; /* 1132 4 */ > + unsigned int sched_remote_wakeup; /* 1132 4 */ > + unsigned int in_execve; /* 1136 4 */ > + unsigned int in_iowait; /* 1136 4 */ > + unsigned int restore_sigmask; /* 1136 4 */ > + unsigned int in_user_fault; /* 1136 4 */ > + unsigned int no_cgroup_migration; /* 1136 4 */ > + unsigned int use_memdelay; /* 1136 4 */ > unsigned int kernel_uaccess_faults_ok; /* 1140 4 */ > long unsigned int atomic_flags; /* 1144 8 */ > /* --- cacheline 18 boundary (1152 bytes) --- */ > @@ -548,7 +547,6 @@ > > /* size: 11008, cachelines: 172, members: 207 */ > /* sum members: 10906, holes: 15, sum holes: 102 */ > - /* bit holes: 2, sum bit holes: 54 bits */ > /* paddings: 3, sum paddings: 14 */ > }; > struct screen_info { > @@ -811,40 +809,64 @@ > struct desc_struct { > u16 limit0; /* 0 2 */ > u16 base0; /* 2 2 */ > - u16 base1:8; /* 4: 8 2 */ > - u16 type:4; /* 4: 4 2 */ > - u16 s:1; /* 4: 3 2 */ > - u16 dpl:2; /* 4: 1 2 */ > - u16 p:1; /* 4: 0 2 */ > - u16 limit1:4; /* 6:12 2 */ > - u16 avl:1; /* 6:11 2 */ > - u16 l:1; /* 6:10 2 */ > - u16 d:1; /* 6: 9 2 */ > - u16 g:1; /* 6: 8 2 */ > - u16 base2:8; /* 6: 0 2 */ > + u16 base1; /* 4 2 */ > + > + /* Bitfield combined with next fields */ > + > + u16 type; /* 5 2 */ > + u16 s; /* 5 2 */ > + u16 dpl; /* 5 2 */ > + u16 p; /* 5 2 */ > + > + /* Bitfield combined with next fields */ > + > + u16 limit1; /* 6 2 */ > + u16 avl; /* 6 2 */ > + u16 l; /* 6 2 */ > + u16 d; /* 6 2 */ > + u16 g; /* 6 2 */ > + > + /* Bitfield combined with next fields */ > + > + u16 base2; /* 7 2 */ > > /* size: 8, cachelines: 1, members: 13 */ > + /* padding: 65535 */ > /* last cacheline: 8 bytes */ > + > + /* BRAIN FART ALERT! 8 != 9 + 0(holes), diff = -1 */ > + > }; > struct idt_bits { > - u16 ist:3; /* 0:13 2 */ > - u16 zero:5; /* 0: 8 2 */ > - u16 type:5; /* 0: 3 2 */ > - u16 dpl:2; /* 0: 1 2 */ > - u16 p:1; /* 0: 0 2 */ > + u16 ist; /* 0 2 */ > + u16 zero; /* 0 2 */ > + > + /* Bitfield combined with next fields */ > + > + u16 type; /* 1 2 */ > + u16 dpl; /* 1 2 */ > + u16 p; /* 1 2 */ > > /* size: 2, cachelines: 1, members: 5 */ > + /* padding: 65535 */ > /* last cacheline: 2 bytes */ > + > + /* BRAIN FART ALERT! 2 != 3 + 0(holes), diff = -1 */ > + > }; > struct gate_struct { > u16 offset_low; /* 0 2 */ > u16 segment; /* 2 2 */ > struct idt_bits bits; /* 4 2 */ > + > + /* XXX last struct has 65535 bytes of padding */ > + > u16 offset_middle; /* 6 2 */ > u32 offset_high; /* 8 4 */ > u32 reserved; /* 12 4 */ > > /* size: 16, cachelines: 1, members: 6 */ > + /* paddings: 1, sum paddings: 65535 */ > /* last cacheline: 16 bytes */ > }; > struct desc_ptr { > @@ -884,9 +906,15 @@ > void * s_mem; /* 40 8 */ > long unsigned int counters; /* 40 8 */ > struct { > - unsigned int inuse:16; /* 40:16 4 */ > - unsigned int objects:15; /* 40: 1 4 */ > - unsigned int frozen:1; /* 40: 0 4 */ > + unsigned int inuse; /* 40 4 */ > + > + /* Bitfield combined with next fields */ > + > + unsigned int objects; /* 42 4 */ > + > + /* Bitfield combined with next fields */ > + > + unsigned int frozen; /* 43 4 */ > }; /* 40 4 */ > }; /* 40 8 */ > }; /* 8 40 */ > @@ -1039,10 +1067,9 @@ > /* XXX 4 bytes hole, try to pack */ > > mm_segment_t addr_limit; /* 152 8 */ > - unsigned int sig_on_uaccess_err:1; /* 160:31 4 */ > - unsigned int uaccess_err:1; /* 160:30 4 */ > + unsigned int sig_on_uaccess_err; /* 160 4 */ > + unsigned int uaccess_err; /* 160 4 */ > > - /* XXX 30 bits hole, try to pack */ > /* XXX 28 bytes hole, try to pack */ > > /* --- cacheline 3 boundary (192 bytes) --- */ > @@ -1050,7 +1077,6 @@ > > /* size: 4352, cachelines: 68, members: 21 */ > /* sum members: 4320, holes: 2, sum holes: 32 */ > - /* bit holes: 1, sum bit holes: 30 bits */ > }; > struct pv_irq_ops { > struct paravirt_callee_save save_fl; /* 0 8 */ >