On Tue, Mar 26, 2019 at 12:18 PM Arnaldo Carvalho de Melo <arnaldo.melo@xxxxxxxxx> wrote: > > Em Tue, Mar 26, 2019 at 03:52:09PM -0300, Arnaldo Carvalho de Melo escreveu: > > Em Tue, Mar 26, 2019 at 11:30:54AM -0700, andrii.nakryiko@xxxxxxxxx escreveu: > > > From: Andrii Nakryiko <andriin@xxxxxx> > > > > > > Counting field sizes only in bits causes confusion and lots of differing > > > output, when compared to previous logic. This commit changes logic so > > > that it counts bit size of bitfield fields separately from byte size of > > > non-bitfield fields. In the end, if there were bit holes, this bit size > > > is emitted explicitly. This makes output for struct/unions not using > > > bitfields identical, while also preserving correctness (and data > > > completeness) for cases with bitfields and bit holes. > > > > > > Example (-before/+after): > > > struct cfg80211_pmsr_request_peer { > > > u8 addr[6]; /* 0 6 */ > > > > > > /* XXX 2 bytes hole, try to pack */ > > > > > > struct cfg80211_chan_def chandef; /* 8 24 */ > > > > > > /* XXX last struct has 4 bytes of padding */ > > > > > > u8 report_ap_tsf:1; /* 32: 0 1 */ > > > > > > /* XXX 7 bits hole, try to pack */ > > > /* XXX 3 bytes hole, try to pack */ > > > > > > struct cfg80211_pmsr_ftm_request_peer ftm; /* 36 12 */ > > > > > > /* XXX last struct has 1 byte of padding */ > > > > > > /* size: 48, cachelines: 1, members: 4 */ > > > - /* sum members: 43, holes: 2, sum holes: 5 */ > > > - /* bit holes: 1, sum bit holes: 7 bits */ > > > + /* sum members: 42, holes: 2, sum holes: 5 */ > > > + /* sum bitfield members: 1 bits, bit holes: 1, sum bit holes: 7 bits */ > > > /* paddings: 2, sum paddings: 5 */ > > > /* last cacheline: 48 bytes */ > > > }; > > > > Looks more clear, indeed, thanks for the patch, applying and testing! > > But there are some problems, look at perhaps the struct that got pahole > started, 'struct sock': > > struct callback_head sk_rcu; /* 1232 16 */ > > /* size: 1248, cachelines: 20, members: 85 */ > - /* sum members: 1240, holes: 4, sum holes: 8 */ > + /* sum members: 1235, holes: 4, sum holes: 8 */ > /* last cacheline: 32 bytes */ > }; > > It doesn't add up, before: size = sum_members + sum_holes = 1248 = 1240 + 8 > > Now some 5 bytes went into a black hole, and that is because all the > bitfields here don't have holes, see: > > $ pahole -C sock -F btf examples/vmlinux-aarch64 > struct sock { > struct sock_common __sk_common; /* 0 136 */ > /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */ > socket_lock_t sk_lock; /* 136 216 */ > /* --- cacheline 5 boundary (320 bytes) was 32 bytes ago --- */ > atomic_t sk_drops; /* 352 4 */ > int sk_rcvlowat; /* 356 4 */ > struct sk_buff_head sk_error_queue; /* 360 96 */ > /* --- cacheline 7 boundary (448 bytes) was 8 bytes ago --- */ > struct sk_buff_head sk_receive_queue; /* 456 96 */ > /* --- cacheline 8 boundary (512 bytes) was 40 bytes ago --- */ > struct { > atomic_t rmem_alloc; /* 552 4 */ > int len; /* 556 4 */ > struct sk_buff * head; /* 560 8 */ > struct sk_buff * tail; /* 568 8 */ > } sk_backlog; /* 552 24 */ > /* --- cacheline 9 boundary (576 bytes) --- */ > int sk_forward_alloc; /* 576 4 */ > unsigned int sk_ll_usec; /* 580 4 */ > unsigned int sk_napi_id; /* 584 4 */ > int sk_rcvbuf; /* 588 4 */ > struct sk_filter * sk_filter; /* 592 8 */ > union { > struct socket_wq * sk_wq; /* 600 8 */ > struct socket_wq * sk_wq_raw; /* 600 8 */ > }; /* 600 8 */ > struct xfrm_policy * sk_policy[2]; /* 608 16 */ > struct dst_entry * sk_rx_dst; /* 624 8 */ > struct dst_entry * sk_dst_cache; /* 632 8 */ > /* --- cacheline 10 boundary (640 bytes) --- */ > atomic_t sk_omem_alloc; /* 640 4 */ > int sk_sndbuf; /* 644 4 */ > int sk_wmem_queued; /* 648 4 */ > refcount_t sk_wmem_alloc; /* 652 4 */ > long unsigned int sk_tsq_flags; /* 656 8 */ > union { > struct sk_buff * sk_send_head; /* 664 8 */ > struct rb_root tcp_rtx_queue; /* 664 8 */ > }; /* 664 8 */ > struct sk_buff_head sk_write_queue; /* 672 96 */ > /* --- cacheline 12 boundary (768 bytes) --- */ > __s32 sk_peek_off; /* 768 4 */ > int sk_write_pending; /* 772 4 */ > __u32 sk_dst_pending_confirm; /* 776 4 */ > u32 sk_pacing_status; /* 780 4 */ > long int sk_sndtimeo; /* 784 8 */ > struct timer_list sk_timer; /* 792 88 */ > /* --- cacheline 13 boundary (832 bytes) was 48 bytes ago --- */ > __u32 sk_priority; /* 880 4 */ > __u32 sk_mark; /* 884 4 */ > long unsigned int sk_pacing_rate; /* 888 8 */ > /* --- cacheline 14 boundary (896 bytes) --- */ > long unsigned int sk_max_pacing_rate; /* 896 8 */ > struct page_frag sk_frag; /* 904 16 */ > netdev_features_t sk_route_caps; /* 920 8 */ > netdev_features_t sk_route_nocaps; /* 928 8 */ > netdev_features_t sk_route_forced_caps; /* 936 8 */ > int sk_gso_type; /* 944 4 */ > unsigned int sk_gso_max_size; /* 948 4 */ > gfp_t sk_allocation; /* 952 4 */ > __u32 sk_txhash; /* 956 4 */ > /* --- cacheline 15 boundary (960 bytes) --- */ > unsigned int __sk_flags_offset[0]; /* 960 0 */ > unsigned int sk_padding:1; /* 960: 0 4 */ > unsigned int sk_kern_sock:1; /* 960: 1 4 */ > unsigned int sk_no_check_tx:1; /* 960: 2 4 */ > unsigned int sk_no_check_rx:1; /* 960: 3 4 */ > unsigned int sk_userlocks:4; /* 960: 4 4 */ > unsigned int sk_protocol:8; /* 960: 8 4 */ > unsigned int sk_type:16; /* 960:16 4 */ > u16 sk_gso_max_segs; /* 964 2 */ > u8 sk_pacing_shift; /* 966 1 */ > > /* XXX 1 byte hole, try to pack */ > > long unsigned int sk_lingertime; /* 968 8 */ > struct proto * sk_prot_creator; /* 976 8 */ > rwlock_t sk_callback_lock; /* 984 72 */ > /* --- cacheline 16 boundary (1024 bytes) was 32 bytes ago --- */ > int sk_err; /* 1056 4 */ > int sk_err_soft; /* 1060 4 */ > u32 sk_ack_backlog; /* 1064 4 */ > u32 sk_max_ack_backlog; /* 1068 4 */ > kuid_t sk_uid; /* 1072 4 */ > > /* XXX 4 bytes hole, try to pack */ > > struct pid * sk_peer_pid; /* 1080 8 */ > /* --- cacheline 17 boundary (1088 bytes) --- */ > const struct cred * sk_peer_cred; /* 1088 8 */ > long int sk_rcvtimeo; /* 1096 8 */ > ktime_t sk_stamp; /* 1104 8 */ > u16 sk_tsflags; /* 1112 2 */ > u8 sk_shutdown; /* 1114 1 */ > > /* XXX 1 byte hole, try to pack */ > > u32 sk_tskey; /* 1116 4 */ > atomic_t sk_zckey; /* 1120 4 */ > u8 sk_clockid; /* 1124 1 */ > u8 sk_txtime_deadline_mode:1; /* 1125: 0 1 */ > u8 sk_txtime_report_errors:1; /* 1125: 1 1 */ > u8 sk_txtime_unused:6; /* 1125: 2 1 */ > > /* XXX 2 bytes hole, try to pack */ > > struct socket * sk_socket; /* 1128 8 */ > void * sk_user_data; /* 1136 8 */ > void * sk_security; /* 1144 8 */ > /* --- cacheline 18 boundary (1152 bytes) --- */ > struct sock_cgroup_data sk_cgrp_data; /* 1152 8 */ > struct mem_cgroup * sk_memcg; /* 1160 8 */ > void (*sk_state_change)(struct sock *); /* 1168 8 */ > void (*sk_data_ready)(struct sock *); /* 1176 8 */ > void (*sk_write_space)(struct sock *); /* 1184 8 */ > void (*sk_error_report)(struct sock *); /* 1192 8 */ > int (*sk_backlog_rcv)(struct sock *, struct sk_buff *); /* 1200 8 */ > struct sk_buff * (*sk_validate_xmit_skb)(struct sock *, struct net_device *, struct sk_buff *); /* 1208 8 */ > /* --- cacheline 19 boundary (1216 bytes) --- */ > void (*sk_destruct)(struct sock *); /* 1216 8 */ > struct sock_reuseport * sk_reuseport_cb; /* 1224 8 */ > struct callback_head sk_rcu; /* 1232 16 */ > > /* size: 1248, cachelines: 20, members: 85 */ > /* sum members: 1235, holes: 4, sum holes: 8 */ > /* last cacheline: 32 bytes */ > }; > > The first bitfield, from sk_padding:1 to sk_type:16 uses 4 bytes, the > second, from sk_txtime_deadline_mode:1 to sk_txtime_unused:6 uses 1 > byte, there we have the 5 missing bytes. > > So we have to show the sum bitfields, not only when there are bit holes, > that way we would see a: > > /* size: 1248, cachelines: 20, members: 85 */ > /* sum members: 1235, holes: 4, sum holes: 8 */ > /* sum bitfield members: 40 bits (5 bytes) */ > /* last cacheline: 32 bytes */ > > With the patch below in place on top of yours, please review, I like this idea! I think we should generalize it to all possible combinations of bit/byte holes. E.g., if there is bit hole, but no byte hole, we probably still want to emit total size of non-bitfield members. I went ahead and produced v2 of this patch, which incorporates idea from your patch below. Please check it out. > > - Arnaldo > > diff --git a/dwarves_fprintf.c b/dwarves_fprintf.c > index cd6c8b9ce7c1..ab427cfcbcce 100644 > --- a/dwarves_fprintf.c > +++ b/dwarves_fprintf.c > @@ -1495,11 +1495,17 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu, > "sum holes: %u */\n", > cconf.indent, tabs, > sum_bytes, class->nr_holes, sum_holes); > - if (sum_bit_holes > 0) > - printed += fprintf(fp, "%.*s/* sum bitfield members: %u bits, bit holes: %d, " > - "sum bit holes: %u bits */\n", > - cconf.indent, tabs, > - sum_bits, class->nr_bit_holes, sum_bit_holes); > + if (sum_bit_holes > 0 || sum_holes > 0) { > + printed += fprintf(fp, "%.*s/* sum bitfield members: %u bits", > + cconf.indent, tabs, sum_bits); > + if (sum_bit_holes > 0) { > + printed += fprintf(fp, ", bit holes: %d, sum bit holes: %u bits", > + class->nr_bit_holes, sum_bit_holes); > + } else { > + printed += fprintf(fp, " (%u bytes)", sum_bits / 8); > + } > + printed += fprintf(fp, " */\n"); > + } > if (class->padding > 0) > printed += fprintf(fp, "%.*s/* padding: %u */\n", > cconf.indent,