On Wed, Apr 3, 2019 at 5:12 PM Arnaldo Carvalho de Melo <arnaldo.melo@xxxxxxxxx> wrote: > > Em Wed, Apr 03, 2019 at 09:07:47PM -0300, Arnaldo Carvalho de Melo escreveu: > > Em Wed, Apr 03, 2019 at 02:50:14PM -0700, Andrii Nakryiko escreveu: > > > On Wed, Apr 3, 2019 at 2:36 PM Arnaldo Carvalho de Melo <arnaldo.melo@xxxxxxxxx> wrote: > > > > Em Wed, Apr 03, 2019 at 06:06:18PM -0300, Arnaldo Carvalho de Melo escreveu: > > > > [acme@quaco pahole]$ pahole -F btf --packable vmlinux | cut -f 1 | while read struct ; do pahole -F btf -C $struct --reorganize vmlinux | grep BRAIN ; done > > > > /* BRAIN FART ALERT! 200 bytes != 176 (member bytes) + 0 (member bits) + 80 (byte holes) + 0 (bit holes), diff = -448 bits */ > > > > /* BRAIN FART ALERT! 760 bytes != 756 (member bytes) + 0 (member bits) + 4 (byte holes) + 0 (bit holes), diff = -32 bits */ > > > > /* BRAIN FART ALERT! 72 bytes != 28 (member bytes) + 0 (member bits) + 100 (byte holes) + 0 (bit holes), diff = -448 bits */ > > > > /* BRAIN FART ALERT! 40 bytes != 36 (member bytes) + 0 (member bits) + 44 (byte holes) + 0 (bit holes), diff = -352 bits */ > > > > /* BRAIN FART ALERT! 184 bytes != 182 (member bytes) + 0 (member bits) + 6 (byte holes) + 0 (bit holes), diff = -32 bits */ > > > > [acme@quaco pahole]$ pahole -F btf --packable vmlinux | wc -l > > > > 503 > > > > [acme@quaco pahole]$ > > > > > > > > 1% getting closer... > > > > > > > > [acme@quaco pahole]$ pahole -F btf --reorganize -C netns_frags vmlinux > > > > struct netns_frags { > > > > long int high_thresh; /* 0 8 */ > > > > long int low_thresh; /* 8 8 */ > > > > int timeout; /* 16 4 */ > > > > int max_dist; /* 20 4 */ > > > > struct inet_frags * f; /* 24 8 */ > > > > atomic_long_t mem; /* 32 8 */ > > > > > > > > /* XXX 24 bytes hole, try to pack */ > > > > > > > > /* --- cacheline 1 boundary (64 bytes) --- */ > > > > struct rhashtable rhashtable; /* 64 136 */ > > > > > > > > /* XXX 56 bytes hole, try to pack */ > > > > > > This one is strange, there should be no hole. Will take a look. > > > > So I've pushed everything to my master branch, removed the last patch of > > your latest series, and have some patches I've worked on to take > > advantage of DW_AT_alignment, and the ones at the end are the 1% of the > > ones that currently are considered --packable. > > > > The problem seems to be related to the last member hole, which should > > probably be just trowed away at the end, together with fixing up the > > class size. > > Mid-air colision: heh :) > > I'm assuming the first three patches are the same as before, with the correct. only patch 4 is different (and seems to be fixing the rest of BFAs, yay!). Thanks for working on this actively! > 4th applied (I had already dropped the 4th of the v1 series), all seems > clear: > > [acme@quaco pahole]$ cat /tmp/BFA | while read struct ; do pahole --reorganize -C $struct -F btf vmlinux ; done > struct netns_frags { > long int high_thresh; /* 0 8 */ > long int low_thresh; /* 8 8 */ > int timeout; /* 16 4 */ > int max_dist; /* 20 4 */ > struct inet_frags * f; /* 24 8 */ > atomic_long_t mem; /* 32 8 */ > > /* XXX 24 bytes hole, try to pack */ > > /* --- cacheline 1 boundary (64 bytes) --- */ > struct rhashtable rhashtable; /* 64 136 */ > > /* size: 200, cachelines: 4, members: 7 */ > /* sum members: 176, holes: 1, sum holes: 24 */ > /* last cacheline: 8 bytes */ > }; /* saved 120 bytes and 1 cacheline! */ > struct worker_pool { > spinlock_t lock; /* 0 4 */ > int cpu; /* 4 4 */ > int node; /* 8 4 */ > int id; /* 12 4 */ > unsigned int flags; /* 16 4 */ > int refcnt; /* 20 4 */ > long unsigned int watchdog_ts; /* 24 8 */ > struct list_head worklist; /* 32 16 */ > int nr_workers; /* 48 4 */ > int nr_idle; /* 52 4 */ > struct list_head idle_list; /* 56 16 */ > /* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */ > struct timer_list idle_timer; /* 72 40 */ > > /* XXX last struct has 4 bytes of padding */ > > struct timer_list mayday_timer; /* 112 40 */ > > /* XXX last struct has 4 bytes of padding */ > > /* --- cacheline 2 boundary (128 bytes) was 24 bytes ago --- */ > struct hlist_head busy_hash[64]; /* 152 512 */ > /* --- cacheline 10 boundary (640 bytes) was 24 bytes ago --- */ > struct worker * manager; /* 664 8 */ > struct list_head workers; /* 672 16 */ > struct completion * detach_completion; /* 688 8 */ > struct ida worker_ida; /* 696 16 */ > /* --- cacheline 11 boundary (704 bytes) was 8 bytes ago --- */ > struct workqueue_attrs * attrs; /* 712 8 */ > struct hlist_node hash_node; /* 720 16 */ > struct callback_head rcu; /* 736 16 */ > atomic_t nr_running; /* 752 4 */ > > /* size: 760, cachelines: 12, members: 22 */ > /* padding: 4 */ > /* paddings: 2, sum paddings: 8 */ > /* last cacheline: 56 bytes */ > }; /* saved 72 bytes and 1 cacheline! */ > struct sbitmap_word { > long unsigned int depth; /* 0 8 */ > spinlock_t swap_lock; /* 8 4 */ > > /* XXX 4 bytes hole, try to pack */ > > long unsigned int cleared; /* 16 8 */ > > /* XXX 40 bytes hole, try to pack */ > > /* --- cacheline 1 boundary (64 bytes) --- */ > long unsigned int word; /* 64 8 */ > > /* size: 72, cachelines: 2, members: 4 */ > /* sum members: 28, holes: 2, sum holes: 44 */ > /* last cacheline: 8 bytes */ > }; /* saved 120 bytes and 1 cacheline! */ > struct ptr_ring { > int producer; /* 0 4 */ > spinlock_t producer_lock; /* 4 4 */ > spinlock_t consumer_lock; /* 8 4 */ > int consumer_tail; /* 12 4 */ > void * * queue; /* 16 8 */ > int batch; /* 24 4 */ > int size; /* 28 4 */ > int consumer_head; /* 32 4 */ > > /* size: 40, cachelines: 1, members: 8 */ > /* padding: 4 */ > /* last cacheline: 40 bytes */ > }; /* saved 152 bytes and 2 cachelines! */ > struct acpi_ec { > acpi_handle handle; /* 0 8 */ > u32 gpe; /* 8 4 */ > bool global_lock; /* 12 1 */ > bool busy_polling; /* 13 1 */ > > /* XXX 2 bytes hole, try to pack */ > > long unsigned int command_addr; /* 16 8 */ > long unsigned int data_addr; /* 24 8 */ > long unsigned int flags; /* 32 8 */ > long unsigned int reference_count; /* 40 8 */ > struct mutex mutex; /* 48 32 */ > /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */ > wait_queue_head_t wait; /* 80 24 */ > struct list_head list; /* 104 16 */ > struct transaction * curr; /* 120 8 */ > /* --- cacheline 2 boundary (128 bytes) --- */ > spinlock_t lock; /* 128 4 */ > unsigned int polling_guard; /* 132 4 */ > struct work_struct work; /* 136 32 */ > long unsigned int timestamp; /* 168 8 */ > long unsigned int nr_pending_queries; /* 176 8 */ > > /* size: 184, cachelines: 3, members: 17 */ > /* sum members: 182, holes: 1, sum holes: 2 */ > /* last cacheline: 56 bytes */ > }; /* saved 16 bytes and 1 cacheline! */ > [acme@quaco pahole]$