Re: [PATCH] dwarves_fprintf: count bitfield member sizes separately

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Mar 26, 2019 at 12:18 PM Arnaldo Carvalho de Melo
<arnaldo.melo@xxxxxxxxx> wrote:
>
> Em Tue, Mar 26, 2019 at 03:52:09PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Tue, Mar 26, 2019 at 11:30:54AM -0700, andrii.nakryiko@xxxxxxxxx escreveu:
> > > From: Andrii Nakryiko <andriin@xxxxxx>
> > >
> > > Counting field sizes only in bits causes confusion and lots of differing
> > > output, when compared to previous logic. This commit changes logic so
> > > that it counts bit size of bitfield fields separately from byte size of
> > > non-bitfield fields. In the end, if there were bit holes, this bit size
> > > is emitted explicitly. This makes output for struct/unions not using
> > > bitfields identical, while also preserving correctness (and data
> > > completeness) for cases with bitfields and bit holes.
> > >
> > > Example (-before/+after):
> > >  struct cfg80211_pmsr_request_peer {
> > >         u8                         addr[6];              /*     0     6 */
> > >
> > >         /* XXX 2 bytes hole, try to pack */
> > >
> > >         struct cfg80211_chan_def   chandef;              /*     8    24 */
> > >
> > >         /* XXX last struct has 4 bytes of padding */
> > >
> > >         u8                         report_ap_tsf:1;      /*    32: 0  1 */
> > >
> > >         /* XXX 7 bits hole, try to pack */
> > >         /* XXX 3 bytes hole, try to pack */
> > >
> > >         struct cfg80211_pmsr_ftm_request_peer ftm;       /*    36    12 */
> > >
> > >         /* XXX last struct has 1 byte of padding */
> > >
> > >         /* size: 48, cachelines: 1, members: 4 */
> > > -       /* sum members: 43, holes: 2, sum holes: 5 */
> > > -       /* bit holes: 1, sum bit holes: 7 bits */
> > > +       /* sum members: 42, holes: 2, sum holes: 5 */
> > > +       /* sum bitfield members: 1 bits, bit holes: 1, sum bit holes: 7 bits */
> > >         /* paddings: 2, sum paddings: 5 */
> > >         /* last cacheline: 48 bytes */
> > >  };
> >
> > Looks more clear, indeed, thanks for the patch, applying and testing!
>
> But there are some problems, look at perhaps the struct that got pahole
> started, 'struct sock':
>
>         struct callback_head       sk_rcu;               /*  1232    16 */
>
>         /* size: 1248, cachelines: 20, members: 85 */
> -       /* sum members: 1240, holes: 4, sum holes: 8 */
> +       /* sum members: 1235, holes: 4, sum holes: 8 */
>         /* last cacheline: 32 bytes */
>  };
>
> It doesn't add up, before: size = sum_members + sum_holes = 1248 = 1240 + 8
>
> Now some 5 bytes went into a black hole, and that is because all the
> bitfields here don't have holes, see:
>
> $ pahole -C sock -F btf examples/vmlinux-aarch64
> struct sock {
>         struct sock_common         __sk_common;          /*     0   136 */
>         /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
>         socket_lock_t              sk_lock;              /*   136   216 */
>         /* --- cacheline 5 boundary (320 bytes) was 32 bytes ago --- */
>         atomic_t                   sk_drops;             /*   352     4 */
>         int                        sk_rcvlowat;          /*   356     4 */
>         struct sk_buff_head        sk_error_queue;       /*   360    96 */
>         /* --- cacheline 7 boundary (448 bytes) was 8 bytes ago --- */
>         struct sk_buff_head        sk_receive_queue;     /*   456    96 */
>         /* --- cacheline 8 boundary (512 bytes) was 40 bytes ago --- */
>         struct {
>                 atomic_t           rmem_alloc;           /*   552     4 */
>                 int                len;                  /*   556     4 */
>                 struct sk_buff *   head;                 /*   560     8 */
>                 struct sk_buff *   tail;                 /*   568     8 */
>         } sk_backlog;                                    /*   552    24 */
>         /* --- cacheline 9 boundary (576 bytes) --- */
>         int                        sk_forward_alloc;     /*   576     4 */
>         unsigned int               sk_ll_usec;           /*   580     4 */
>         unsigned int               sk_napi_id;           /*   584     4 */
>         int                        sk_rcvbuf;            /*   588     4 */
>         struct sk_filter *         sk_filter;            /*   592     8 */
>         union {
>                 struct socket_wq * sk_wq;                /*   600     8 */
>                 struct socket_wq * sk_wq_raw;            /*   600     8 */
>         };                                               /*   600     8 */
>         struct xfrm_policy *       sk_policy[2];         /*   608    16 */
>         struct dst_entry *         sk_rx_dst;            /*   624     8 */
>         struct dst_entry *         sk_dst_cache;         /*   632     8 */
>         /* --- cacheline 10 boundary (640 bytes) --- */
>         atomic_t                   sk_omem_alloc;        /*   640     4 */
>         int                        sk_sndbuf;            /*   644     4 */
>         int                        sk_wmem_queued;       /*   648     4 */
>         refcount_t                 sk_wmem_alloc;        /*   652     4 */
>         long unsigned int          sk_tsq_flags;         /*   656     8 */
>         union {
>                 struct sk_buff *   sk_send_head;         /*   664     8 */
>                 struct rb_root     tcp_rtx_queue;        /*   664     8 */
>         };                                               /*   664     8 */
>         struct sk_buff_head        sk_write_queue;       /*   672    96 */
>         /* --- cacheline 12 boundary (768 bytes) --- */
>         __s32                      sk_peek_off;          /*   768     4 */
>         int                        sk_write_pending;     /*   772     4 */
>         __u32                      sk_dst_pending_confirm; /*   776     4 */
>         u32                        sk_pacing_status;     /*   780     4 */
>         long int                   sk_sndtimeo;          /*   784     8 */
>         struct timer_list          sk_timer;             /*   792    88 */
>         /* --- cacheline 13 boundary (832 bytes) was 48 bytes ago --- */
>         __u32                      sk_priority;          /*   880     4 */
>         __u32                      sk_mark;              /*   884     4 */
>         long unsigned int          sk_pacing_rate;       /*   888     8 */
>         /* --- cacheline 14 boundary (896 bytes) --- */
>         long unsigned int          sk_max_pacing_rate;   /*   896     8 */
>         struct page_frag           sk_frag;              /*   904    16 */
>         netdev_features_t          sk_route_caps;        /*   920     8 */
>         netdev_features_t          sk_route_nocaps;      /*   928     8 */
>         netdev_features_t          sk_route_forced_caps; /*   936     8 */
>         int                        sk_gso_type;          /*   944     4 */
>         unsigned int               sk_gso_max_size;      /*   948     4 */
>         gfp_t                      sk_allocation;        /*   952     4 */
>         __u32                      sk_txhash;            /*   956     4 */
>         /* --- cacheline 15 boundary (960 bytes) --- */
>         unsigned int               __sk_flags_offset[0]; /*   960     0 */
>         unsigned int               sk_padding:1;         /*   960: 0  4 */
>         unsigned int               sk_kern_sock:1;       /*   960: 1  4 */
>         unsigned int               sk_no_check_tx:1;     /*   960: 2  4 */
>         unsigned int               sk_no_check_rx:1;     /*   960: 3  4 */
>         unsigned int               sk_userlocks:4;       /*   960: 4  4 */
>         unsigned int               sk_protocol:8;        /*   960: 8  4 */
>         unsigned int               sk_type:16;           /*   960:16  4 */
>         u16                        sk_gso_max_segs;      /*   964     2 */
>         u8                         sk_pacing_shift;      /*   966     1 */
>
>         /* XXX 1 byte hole, try to pack */
>
>         long unsigned int          sk_lingertime;        /*   968     8 */
>         struct proto *             sk_prot_creator;      /*   976     8 */
>         rwlock_t                   sk_callback_lock;     /*   984    72 */
>         /* --- cacheline 16 boundary (1024 bytes) was 32 bytes ago --- */
>         int                        sk_err;               /*  1056     4 */
>         int                        sk_err_soft;          /*  1060     4 */
>         u32                        sk_ack_backlog;       /*  1064     4 */
>         u32                        sk_max_ack_backlog;   /*  1068     4 */
>         kuid_t                     sk_uid;               /*  1072     4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         struct pid *               sk_peer_pid;          /*  1080     8 */
>         /* --- cacheline 17 boundary (1088 bytes) --- */
>         const struct cred  *       sk_peer_cred;         /*  1088     8 */
>         long int                   sk_rcvtimeo;          /*  1096     8 */
>         ktime_t                    sk_stamp;             /*  1104     8 */
>         u16                        sk_tsflags;           /*  1112     2 */
>         u8                         sk_shutdown;          /*  1114     1 */
>
>         /* XXX 1 byte hole, try to pack */
>
>         u32                        sk_tskey;             /*  1116     4 */
>         atomic_t                   sk_zckey;             /*  1120     4 */
>         u8                         sk_clockid;           /*  1124     1 */
>         u8                         sk_txtime_deadline_mode:1; /*  1125: 0  1 */
>         u8                         sk_txtime_report_errors:1; /*  1125: 1  1 */
>         u8                         sk_txtime_unused:6;   /*  1125: 2  1 */
>
>         /* XXX 2 bytes hole, try to pack */
>
>         struct socket *            sk_socket;            /*  1128     8 */
>         void *                     sk_user_data;         /*  1136     8 */
>         void *                     sk_security;          /*  1144     8 */
>         /* --- cacheline 18 boundary (1152 bytes) --- */
>         struct sock_cgroup_data    sk_cgrp_data;         /*  1152     8 */
>         struct mem_cgroup *        sk_memcg;             /*  1160     8 */
>         void                       (*sk_state_change)(struct sock *); /*  1168     8 */
>         void                       (*sk_data_ready)(struct sock *); /*  1176     8 */
>         void                       (*sk_write_space)(struct sock *); /*  1184     8 */
>         void                       (*sk_error_report)(struct sock *); /*  1192     8 */
>         int                        (*sk_backlog_rcv)(struct sock *, struct sk_buff *); /*  1200     8 */
>         struct sk_buff *           (*sk_validate_xmit_skb)(struct sock *, struct net_device *, struct sk_buff *); /*  1208     8 */
>         /* --- cacheline 19 boundary (1216 bytes) --- */
>         void                       (*sk_destruct)(struct sock *); /*  1216     8 */
>         struct sock_reuseport *    sk_reuseport_cb;      /*  1224     8 */
>         struct callback_head       sk_rcu;               /*  1232    16 */
>
>         /* size: 1248, cachelines: 20, members: 85 */
>         /* sum members: 1235, holes: 4, sum holes: 8 */
>         /* last cacheline: 32 bytes */
> };
>
> The first bitfield, from sk_padding:1 to sk_type:16 uses 4 bytes, the
> second, from sk_txtime_deadline_mode:1 to sk_txtime_unused:6 uses 1
> byte, there we have the 5 missing bytes.
>
> So we have to show the sum bitfields, not only when there are bit holes,
> that way we would see a:
>
>         /* size: 1248, cachelines: 20, members: 85 */
>         /* sum members: 1235, holes: 4, sum holes: 8 */
>         /* sum bitfield members: 40 bits (5 bytes) */
>         /* last cacheline: 32 bytes */
>
> With the patch below in place on top of yours, please review,

I like this idea! I think we should generalize it to all possible
combinations of bit/byte holes. E.g., if there is bit hole, but no
byte hole, we probably still want to emit total size of non-bitfield
members. I went ahead and produced v2 of this patch, which
incorporates idea from your patch below. Please check it out.


>
> - Arnaldo
>
> diff --git a/dwarves_fprintf.c b/dwarves_fprintf.c
> index cd6c8b9ce7c1..ab427cfcbcce 100644
> --- a/dwarves_fprintf.c
> +++ b/dwarves_fprintf.c
> @@ -1495,11 +1495,17 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu,
>                                    "sum holes: %u */\n",
>                                    cconf.indent, tabs,
>                                    sum_bytes, class->nr_holes, sum_holes);
> -       if (sum_bit_holes > 0)
> -               printed += fprintf(fp, "%.*s/* sum bitfield members: %u bits, bit holes: %d, "
> -                                  "sum bit holes: %u bits */\n",
> -                                  cconf.indent, tabs,
> -                                  sum_bits, class->nr_bit_holes, sum_bit_holes);
> +       if (sum_bit_holes > 0 || sum_holes > 0) {
> +               printed += fprintf(fp, "%.*s/* sum bitfield members: %u bits",
> +                                  cconf.indent, tabs, sum_bits);
> +               if (sum_bit_holes > 0) {
> +                       printed += fprintf(fp, ", bit holes: %d, sum bit holes: %u bits",
> +                                          class->nr_bit_holes, sum_bit_holes);
> +               } else {
> +                       printed += fprintf(fp, " (%u bytes)", sum_bits / 8);
> +               }
> +               printed += fprintf(fp, " */\n");
> +       }
>         if (class->padding > 0)
>                 printed += fprintf(fp, "%.*s/* padding: %u */\n",
>                                    cconf.indent,



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux