Re: [PATCH pahole 4/4] reorganize: shift tail members for non-bitfields only

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Apr 3, 2019 at 5:12 PM Arnaldo Carvalho de Melo
<arnaldo.melo@xxxxxxxxx> wrote:
>
> Em Wed, Apr 03, 2019 at 09:07:47PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Wed, Apr 03, 2019 at 02:50:14PM -0700, Andrii Nakryiko escreveu:
> > > On Wed, Apr 3, 2019 at 2:36 PM Arnaldo Carvalho de Melo <arnaldo.melo@xxxxxxxxx> wrote:
> > > > Em Wed, Apr 03, 2019 at 06:06:18PM -0300, Arnaldo Carvalho de Melo escreveu:
> > > > [acme@quaco pahole]$ pahole -F btf --packable vmlinux  | cut -f 1 | while read struct ; do pahole -F btf -C $struct --reorganize vmlinux | grep BRAIN ; done
> > > >         /* BRAIN FART ALERT! 200 bytes != 176 (member bytes) + 0 (member bits) + 80 (byte holes) + 0 (bit holes), diff = -448 bits */
> > > >         /* BRAIN FART ALERT! 760 bytes != 756 (member bytes) + 0 (member bits) + 4 (byte holes) + 0 (bit holes), diff = -32 bits */
> > > >         /* BRAIN FART ALERT! 72 bytes != 28 (member bytes) + 0 (member bits) + 100 (byte holes) + 0 (bit holes), diff = -448 bits */
> > > >         /* BRAIN FART ALERT! 40 bytes != 36 (member bytes) + 0 (member bits) + 44 (byte holes) + 0 (bit holes), diff = -352 bits */
> > > >         /* BRAIN FART ALERT! 184 bytes != 182 (member bytes) + 0 (member bits) + 6 (byte holes) + 0 (bit holes), diff = -32 bits */
> > > > [acme@quaco pahole]$ pahole -F btf --packable vmlinux  | wc -l
> > > > 503
> > > > [acme@quaco pahole]$
> > > >
> > > > 1% getting closer...
> > > >
> > > > [acme@quaco pahole]$ pahole -F btf --reorganize -C netns_frags vmlinux
> > > > struct netns_frags {
> > > >         long int                   high_thresh;          /*     0     8 */
> > > >         long int                   low_thresh;           /*     8     8 */
> > > >         int                        timeout;              /*    16     4 */
> > > >         int                        max_dist;             /*    20     4 */
> > > >         struct inet_frags *        f;                    /*    24     8 */
> > > >         atomic_long_t              mem;                  /*    32     8 */
> > > >
> > > >         /* XXX 24 bytes hole, try to pack */
> > > >
> > > >         /* --- cacheline 1 boundary (64 bytes) --- */
> > > >         struct rhashtable          rhashtable;           /*    64   136 */
> > > >
> > > >         /* XXX 56 bytes hole, try to pack */
> > >
> > > This one is strange, there should be no hole. Will take a look.
> >
> > So I've pushed everything to my master branch, removed the last patch of
> > your latest series, and have some patches I've worked on to take
> > advantage of DW_AT_alignment, and the ones at the end are the 1% of the
> > ones that currently are considered --packable.
> >
> > The problem seems to be related to the last member hole, which should
> > probably be just trowed away at the end, together with fixing up the
> > class size.
>
> Mid-air colision:

heh :)

>
> I'm assuming the first three patches are the same as before, with the

correct. only patch 4 is different (and seems to be fixing the rest of
BFAs, yay!).

Thanks for working on this actively!

> 4th applied (I had already dropped the 4th of the v1 series), all seems
> clear:
>
> [acme@quaco pahole]$ cat /tmp/BFA | while read struct ; do pahole --reorganize -C $struct -F btf vmlinux ; done
> struct netns_frags {
>         long int                   high_thresh;          /*     0     8 */
>         long int                   low_thresh;           /*     8     8 */
>         int                        timeout;              /*    16     4 */
>         int                        max_dist;             /*    20     4 */
>         struct inet_frags *        f;                    /*    24     8 */
>         atomic_long_t              mem;                  /*    32     8 */
>
>         /* XXX 24 bytes hole, try to pack */
>
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         struct rhashtable          rhashtable;           /*    64   136 */
>
>         /* size: 200, cachelines: 4, members: 7 */
>         /* sum members: 176, holes: 1, sum holes: 24 */
>         /* last cacheline: 8 bytes */
> };   /* saved 120 bytes and 1 cacheline! */
> struct worker_pool {
>         spinlock_t                 lock;                 /*     0     4 */
>         int                        cpu;                  /*     4     4 */
>         int                        node;                 /*     8     4 */
>         int                        id;                   /*    12     4 */
>         unsigned int               flags;                /*    16     4 */
>         int                        refcnt;               /*    20     4 */
>         long unsigned int          watchdog_ts;          /*    24     8 */
>         struct list_head           worklist;             /*    32    16 */
>         int                        nr_workers;           /*    48     4 */
>         int                        nr_idle;              /*    52     4 */
>         struct list_head           idle_list;            /*    56    16 */
>         /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
>         struct timer_list          idle_timer;           /*    72    40 */
>
>         /* XXX last struct has 4 bytes of padding */
>
>         struct timer_list          mayday_timer;         /*   112    40 */
>
>         /* XXX last struct has 4 bytes of padding */
>
>         /* --- cacheline 2 boundary (128 bytes) was 24 bytes ago --- */
>         struct hlist_head          busy_hash[64];        /*   152   512 */
>         /* --- cacheline 10 boundary (640 bytes) was 24 bytes ago --- */
>         struct worker *            manager;              /*   664     8 */
>         struct list_head           workers;              /*   672    16 */
>         struct completion *        detach_completion;    /*   688     8 */
>         struct ida                 worker_ida;           /*   696    16 */
>         /* --- cacheline 11 boundary (704 bytes) was 8 bytes ago --- */
>         struct workqueue_attrs *   attrs;                /*   712     8 */
>         struct hlist_node          hash_node;            /*   720    16 */
>         struct callback_head       rcu;                  /*   736    16 */
>         atomic_t                   nr_running;           /*   752     4 */
>
>         /* size: 760, cachelines: 12, members: 22 */
>         /* padding: 4 */
>         /* paddings: 2, sum paddings: 8 */
>         /* last cacheline: 56 bytes */
> };   /* saved 72 bytes and 1 cacheline! */
> struct sbitmap_word {
>         long unsigned int          depth;                /*     0     8 */
>         spinlock_t                 swap_lock;            /*     8     4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         long unsigned int          cleared;              /*    16     8 */
>
>         /* XXX 40 bytes hole, try to pack */
>
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         long unsigned int          word;                 /*    64     8 */
>
>         /* size: 72, cachelines: 2, members: 4 */
>         /* sum members: 28, holes: 2, sum holes: 44 */
>         /* last cacheline: 8 bytes */
> };   /* saved 120 bytes and 1 cacheline! */
> struct ptr_ring {
>         int                        producer;             /*     0     4 */
>         spinlock_t                 producer_lock;        /*     4     4 */
>         spinlock_t                 consumer_lock;        /*     8     4 */
>         int                        consumer_tail;        /*    12     4 */
>         void * *                   queue;                /*    16     8 */
>         int                        batch;                /*    24     4 */
>         int                        size;                 /*    28     4 */
>         int                        consumer_head;        /*    32     4 */
>
>         /* size: 40, cachelines: 1, members: 8 */
>         /* padding: 4 */
>         /* last cacheline: 40 bytes */
> };   /* saved 152 bytes and 2 cachelines! */
> struct acpi_ec {
>         acpi_handle                handle;               /*     0     8 */
>         u32                        gpe;                  /*     8     4 */
>         bool                       global_lock;          /*    12     1 */
>         bool                       busy_polling;         /*    13     1 */
>
>         /* XXX 2 bytes hole, try to pack */
>
>         long unsigned int          command_addr;         /*    16     8 */
>         long unsigned int          data_addr;            /*    24     8 */
>         long unsigned int          flags;                /*    32     8 */
>         long unsigned int          reference_count;      /*    40     8 */
>         struct mutex               mutex;                /*    48    32 */
>         /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
>         wait_queue_head_t          wait;                 /*    80    24 */
>         struct list_head           list;                 /*   104    16 */
>         struct transaction *       curr;                 /*   120     8 */
>         /* --- cacheline 2 boundary (128 bytes) --- */
>         spinlock_t                 lock;                 /*   128     4 */
>         unsigned int               polling_guard;        /*   132     4 */
>         struct work_struct         work;                 /*   136    32 */
>         long unsigned int          timestamp;            /*   168     8 */
>         long unsigned int          nr_pending_queries;   /*   176     8 */
>
>         /* size: 184, cachelines: 3, members: 17 */
>         /* sum members: 182, holes: 1, sum holes: 2 */
>         /* last cacheline: 56 bytes */
> };   /* saved 16 bytes and 1 cacheline! */
> [acme@quaco pahole]$



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux