Em Wed, Apr 03, 2019 at 12:01:45AM -0700, andrii.nakryiko@xxxxxxxxx escreveu: > From: Andrii Nakryiko <andriin@xxxxxx> > > pahole --reorganize heavily depends on member's bit_hole and hole fields > to denote bit/byte holes *after* member. Previous commit > "dwarves: use bit sizes and bit/byte hole info in __class__fprintf" > changed its meaning to bit/byte hole *before* member to accomodate > possible bit/byte holes at the beginning of a struct. This change broke > reorganization algorithm, though, which is quite involved and isn't > trivially modifiable to accomodate new semantics. > > This patch reverts the meaning of bit_hole and hole, but also introduces > per class pre_bit_hole/pre_hole to record initial bit/byte hole of > a struct. This allows to fix reorg code more easily and still handle > initial holes cases, if at the expense of being not as elegant. Yeah, with this back in place the '--packable' option, that reorganizes all structs to see if we can make some more compact is back working with a vmlinux for x86, using the BTF info to speed up the process: [acme@quaco pahole]$ pahole -F btf --packable vmlinux | sort -nr -k4 | head bts_ctx 12288 8192 4096 swsusp_info 4096 432 3664 vc_data 792 496 296 pci_dev 2488 2320 168 rcu_state 3392 3240 152 ptr_ring 192 40 152 xdp_sock 960 840 120 zone 1664 1552 112 rcu_data 576 472 104 rcu_node 576 480 96 [acme@quaco pahole]$ [acme@quaco pahole]$ time pahole -F btf --packable vmlinux > /dev/null real 0m0.024s user 0m0.018s sys 0m0.006s [acme@quaco pahole]$ Most are false positives, as DWARF (let alone BTF) has explicit ways to tell that these are all __attribute__((aligned(something)) and pahole still don't infer those by realizing that its not a natural alignment, a heuristic that would catch most of these bigger holes, such as: [acme@quaco pahole]$ time pahole -F btf -C ptr_ring vmlinux struct ptr_ring { int producer; /* 0 4 */ spinlock_t producer_lock; /* 4 4 */ /* XXX 56 bytes hole, try to pack */ /* --- cacheline 1 boundary (64 bytes) --- */ int consumer_head; /* 64 4 */ int consumer_tail; /* 68 4 */ spinlock_t consumer_lock; /* 72 4 */ /* XXX 52 bytes hole, try to pack */ /* --- cacheline 2 boundary (128 bytes) --- */ int size; /* 128 4 */ int batch; /* 132 4 */ void * * queue; /* 136 8 */ /* size: 192, cachelines: 3, members: 8 */ /* sum members: 36, holes: 2, sum holes: 108 */ /* padding: 48 */ }; real 0m0.027s user 0m0.016s sys 0m0.011s [acme@quaco pahole]$ That, in its original source code has: include/linux/ptr_ring.h struct ptr_ring { int producer ____cacheline_aligned_in_smp; spinlock_t producer_lock; int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */ int consumer_tail; /* next entry to invalidate */ spinlock_t consumer_lock; /* Shared consumer/producer data */ /* Read-only by both the producer and the consumer */ int size ____cacheline_aligned_in_smp; /* max entries in queue */ int batch; /* number of entries to consume in a batch */ void **queue; }; So pahole --reorganize will just trow away those __cacheline_aligned_in_smp it doesn't see: [acme@quaco pahole]$ time pahole -F btf -C ptr_ring --reorganize vmlinux struct ptr_ring { int producer; /* 0 4 */ spinlock_t producer_lock; /* 4 4 */ spinlock_t consumer_lock; /* 8 4 */ int batch; /* 12 4 */ void * * queue; /* 16 8 */ int size; /* 24 4 */ int consumer_tail; /* 28 4 */ int consumer_head; /* 32 4 */ /* XXX 28 bytes hole, try to pack */ /* size: 40, cachelines: 1, members: 8 */ /* sum members: 36, holes: 2, sum holes: 28 */ /* padding: 4 */ /* last cacheline: 40 bytes */ /* BRAIN FART ALERT! 40 bytes != 36 (member bytes) + 0 (member bits) + 28 (byte holes) + 0 (bit holes), diff = -224 bits */ }; /* saved 152 bytes and 2 cachelines! */ real 0m0.051s user 0m0.035s sys 0m0.016s [acme@quaco pahole]$ Which shows it still has some issues, unsure if pre-dating BTF, the same problem happens with '-F dwarf', will check after applying all the patches in this kit. - Arnaldo > Signed-off-by: Andrii Nakryiko <andriin@xxxxxx> > --- > dwarves.c | 9 ++++-- > dwarves.h | 2 ++ > dwarves_fprintf.c | 72 +++++++++++++++++++++++++++++++---------------- > 3 files changed, 57 insertions(+), 26 deletions(-) > > diff --git a/dwarves.c b/dwarves.c > index 619dcb3..efa58a9 100644 > --- a/dwarves.c > +++ b/dwarves.c > @@ -1260,8 +1260,13 @@ void class__find_holes(struct class *class) > cur_bitfield_end = bit_end; > } > > - pos->hole = byte_holes; > - pos->bit_hole = bit_holes; > + if (last) { > + last->hole = byte_holes; > + last->bit_hole = bit_holes; > + } else { > + class->pre_hole = byte_holes; > + class->pre_bit_hole = bit_holes; > + } > if (bit_holes) > class->nr_bit_holes++; > if (byte_holes) > diff --git a/dwarves.h b/dwarves.h > index e39871f..46083da 100644 > --- a/dwarves.h > +++ b/dwarves.h > @@ -1026,7 +1026,9 @@ struct class { > uint16_t nr_vtable_entries; > uint8_t nr_holes; > uint8_t nr_bit_holes; > + uint16_t pre_hole; > uint16_t padding; > + uint8_t pre_bit_hole; > uint8_t bit_padding; > bool holes_searched; > void *priv; > diff --git a/dwarves_fprintf.c b/dwarves_fprintf.c > index faae3ba..ef53e2b 100644 > --- a/dwarves_fprintf.c > +++ b/dwarves_fprintf.c > @@ -1292,6 +1292,30 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu, > > printed += fprintf(fp, " {\n"); > > + if (class->pre_bit_hole > 0 && !cconf.suppress_comments) { > + if (!newline++) { > + fputc('\n', fp); > + ++printed; > + } > + printed += fprintf(fp, "%.*s/* XXX %d bit%s hole, " > + "try to pack */\n", cconf.indent, tabs, > + class->pre_bit_hole, > + class->pre_bit_hole != 1 ? "s" : ""); > + sum_bit_holes += class->pre_bit_hole; > + } > + > + if (class->pre_hole > 0 && !cconf.suppress_comments) { > + if (!newline++) { > + fputc('\n', fp); > + ++printed; > + } > + printed += fprintf(fp, "%.*s/* XXX %d byte%s hole, " > + "try to pack */\n", > + cconf.indent, tabs, class->pre_hole, > + class->pre_hole != 1 ? "s" : ""); > + sum_holes += class->pre_hole; > + } > + > type__for_each_tag(type, tag_pos) { > struct tag *type; > const char *accessibility = tag__accessibility(tag_pos); > @@ -1361,30 +1385,6 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu, > } > } > > - if (pos->bit_hole != 0 && !cconf.suppress_comments) { > - if (!newline++) { > - fputc('\n', fp); > - ++printed; > - } > - printed += fprintf(fp, "%.*s/* XXX %d bit%s hole, " > - "try to pack */\n", cconf.indent, tabs, > - pos->bit_hole, > - pos->bit_hole != 1 ? "s" : ""); > - sum_bit_holes += pos->bit_hole; > - } > - > - if (pos->hole > 0 && !cconf.suppress_comments) { > - if (!newline++) { > - fputc('\n', fp); > - ++printed; > - } > - printed += fprintf(fp, "%.*s/* XXX %d byte%s hole, " > - "try to pack */\n", > - cconf.indent, tabs, pos->hole, > - pos->hole != 1 ? "s" : ""); > - sum_holes += pos->hole; > - } > - > if (newline) { > fputc('\n', fp); > newline = 0; > @@ -1428,6 +1428,30 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu, > } > } > > + if (pos->bit_hole != 0 && !cconf.suppress_comments) { > + if (!newline++) { > + fputc('\n', fp); > + ++printed; > + } > + printed += fprintf(fp, "\n%.*s/* XXX %d bit%s hole, " > + "try to pack */", cconf.indent, tabs, > + pos->bit_hole, > + pos->bit_hole != 1 ? "s" : ""); > + sum_bit_holes += pos->bit_hole; > + } > + > + if (pos->hole > 0 && !cconf.suppress_comments) { > + if (!newline++) { > + fputc('\n', fp); > + ++printed; > + } > + printed += fprintf(fp, "\n%.*s/* XXX %d byte%s hole, " > + "try to pack */", > + cconf.indent, tabs, pos->hole, > + pos->hole != 1 ? "s" : ""); > + sum_holes += pos->hole; > + } > + > fputc('\n', fp); > ++printed; > > -- > 2.17.1 -- - Arnaldo