Re: [PATCH pahole 1/4] dwarves: revert semantics of member bit/byte hole

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Em Wed, Apr 03, 2019 at 12:01:45AM -0700, andrii.nakryiko@xxxxxxxxx escreveu:
> From: Andrii Nakryiko <andriin@xxxxxx>
> 
> pahole --reorganize heavily depends on member's bit_hole and hole fields
> to denote bit/byte holes *after* member. Previous commit
> "dwarves: use bit sizes and bit/byte hole info in __class__fprintf"
> changed its meaning to bit/byte hole *before* member to accomodate
> possible bit/byte holes at the beginning of a struct. This change broke
> reorganization algorithm, though, which is quite involved and isn't
> trivially modifiable to accomodate new semantics.
> 
> This patch reverts the meaning of bit_hole and hole, but also introduces
> per class pre_bit_hole/pre_hole to record initial bit/byte hole of
> a struct. This allows to fix reorg code more easily and still handle
> initial holes cases, if at the expense of being not as elegant.

Yeah, with this back in place the '--packable' option, that reorganizes
all structs to see if we can make some more compact is back working with
a vmlinux for x86, using the BTF info to speed up the process:

[acme@quaco pahole]$ pahole -F btf --packable vmlinux | sort -nr -k4 | head
bts_ctx         12288  8192  4096
swsusp_info	 4096   432  3664
vc_data           792   496   296
pci_dev          2488  2320   168
rcu_state	 3392  3240   152
ptr_ring	  192    40   152
xdp_sock	  960   840   120
zone             1664  1552   112
rcu_data	  576   472   104
rcu_node	  576	480    96
[acme@quaco pahole]$

[acme@quaco pahole]$ time pahole -F btf --packable vmlinux > /dev/null

real	0m0.024s
user	0m0.018s
sys	0m0.006s
[acme@quaco pahole]$

Most are false positives, as DWARF (let alone BTF) has explicit ways to
tell that these are all __attribute__((aligned(something)) and pahole
still don't infer those by realizing that its not a natural alignment, a
heuristic that would catch most of these bigger holes, such as:

[acme@quaco pahole]$ time pahole -F btf -C ptr_ring vmlinux
struct ptr_ring {
	int                        producer;             /*     0     4 */
	spinlock_t                 producer_lock;        /*     4     4 */

	/* XXX 56 bytes hole, try to pack */

	/* --- cacheline 1 boundary (64 bytes) --- */
	int                        consumer_head;        /*    64     4 */
	int                        consumer_tail;        /*    68     4 */
	spinlock_t                 consumer_lock;        /*    72     4 */

	/* XXX 52 bytes hole, try to pack */

	/* --- cacheline 2 boundary (128 bytes) --- */
	int                        size;                 /*   128     4 */
	int                        batch;                /*   132     4 */
	void * *                   queue;                /*   136     8 */

	/* size: 192, cachelines: 3, members: 8 */
	/* sum members: 36, holes: 2, sum holes: 108 */
	/* padding: 48 */
};

real	0m0.027s
user	0m0.016s
sys	0m0.011s
[acme@quaco pahole]$

That, in its original source code has:

include/linux/ptr_ring.h

struct ptr_ring {
        int producer ____cacheline_aligned_in_smp;
        spinlock_t producer_lock;
        int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
        int consumer_tail; /* next entry to invalidate */
        spinlock_t consumer_lock;
        /* Shared consumer/producer data */
        /* Read-only by both the producer and the consumer */
        int size ____cacheline_aligned_in_smp; /* max entries in queue */
        int batch; /* number of entries to consume in a batch */
        void **queue;
};

So pahole --reorganize will just trow away those
__cacheline_aligned_in_smp it doesn't see:

[acme@quaco pahole]$ time pahole -F btf -C ptr_ring --reorganize vmlinux
struct ptr_ring {
	int                        producer;             /*     0     4 */
	spinlock_t                 producer_lock;        /*     4     4 */
	spinlock_t                 consumer_lock;        /*     8     4 */
	int                        batch;                /*    12     4 */
	void * *                   queue;                /*    16     8 */
	int                        size;                 /*    24     4 */
	int                        consumer_tail;        /*    28     4 */
	int                        consumer_head;        /*    32     4 */

	/* XXX 28 bytes hole, try to pack */

	/* size: 40, cachelines: 1, members: 8 */
	/* sum members: 36, holes: 2, sum holes: 28 */
	/* padding: 4 */
	/* last cacheline: 40 bytes */

	/* BRAIN FART ALERT! 40 bytes != 36 (member bytes) + 0 (member bits) + 28 (byte holes) + 0 (bit holes), diff = -224 bits */
};   /* saved 152 bytes and 2 cachelines! */

real	0m0.051s
user	0m0.035s
sys	0m0.016s
[acme@quaco pahole]$

Which shows it still has some issues, unsure if pre-dating BTF, the same
problem happens with '-F dwarf', will check after applying all the
patches in this kit.

- Arnaldo

 
> Signed-off-by: Andrii Nakryiko <andriin@xxxxxx>
> ---
>  dwarves.c         |  9 ++++--
>  dwarves.h         |  2 ++
>  dwarves_fprintf.c | 72 +++++++++++++++++++++++++++++++----------------
>  3 files changed, 57 insertions(+), 26 deletions(-)
> 
> diff --git a/dwarves.c b/dwarves.c
> index 619dcb3..efa58a9 100644
> --- a/dwarves.c
> +++ b/dwarves.c
> @@ -1260,8 +1260,13 @@ void class__find_holes(struct class *class)
>  			cur_bitfield_end = bit_end;
>  		}
>  
> -		pos->hole = byte_holes;
> -		pos->bit_hole = bit_holes;
> +		if (last) {
> +			last->hole = byte_holes;
> +			last->bit_hole = bit_holes;
> +		} else {
> +			class->pre_hole = byte_holes;
> +			class->pre_bit_hole = bit_holes;
> +		}
>  		if (bit_holes)
>  			class->nr_bit_holes++;
>  		if (byte_holes)
> diff --git a/dwarves.h b/dwarves.h
> index e39871f..46083da 100644
> --- a/dwarves.h
> +++ b/dwarves.h
> @@ -1026,7 +1026,9 @@ struct class {
>  	uint16_t	 nr_vtable_entries;
>  	uint8_t		 nr_holes;
>  	uint8_t		 nr_bit_holes;
> +	uint16_t	 pre_hole;
>  	uint16_t	 padding;
> +	uint8_t		 pre_bit_hole;
>  	uint8_t		 bit_padding;
>  	bool		 holes_searched;
>  	void		 *priv;
> diff --git a/dwarves_fprintf.c b/dwarves_fprintf.c
> index faae3ba..ef53e2b 100644
> --- a/dwarves_fprintf.c
> +++ b/dwarves_fprintf.c
> @@ -1292,6 +1292,30 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu,
>  
>  	printed += fprintf(fp, " {\n");
>  
> +	if (class->pre_bit_hole > 0 && !cconf.suppress_comments) {
> +		if (!newline++) {
> +			fputc('\n', fp);
> +			++printed;
> +		}
> +		printed += fprintf(fp, "%.*s/* XXX %d bit%s hole, "
> +				   "try to pack */\n", cconf.indent, tabs,
> +				   class->pre_bit_hole,
> +				   class->pre_bit_hole != 1 ? "s" : "");
> +		sum_bit_holes += class->pre_bit_hole;
> +	}
> +
> +	if (class->pre_hole > 0 && !cconf.suppress_comments) {
> +		if (!newline++) {
> +			fputc('\n', fp);
> +			++printed;
> +		}
> +		printed += fprintf(fp, "%.*s/* XXX %d byte%s hole, "
> +				   "try to pack */\n",
> +				   cconf.indent, tabs, class->pre_hole,
> +				   class->pre_hole != 1 ? "s" : "");
> +		sum_holes += class->pre_hole;
> +	}
> +
>  	type__for_each_tag(type, tag_pos) {
>  		struct tag *type;
>  		const char *accessibility = tag__accessibility(tag_pos);
> @@ -1361,30 +1385,6 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu,
>  			}
>  		}
>  
> -		if (pos->bit_hole != 0 && !cconf.suppress_comments) {
> -			if (!newline++) {
> -				fputc('\n', fp);
> -				++printed;
> -			}
> -			printed += fprintf(fp, "%.*s/* XXX %d bit%s hole, "
> -					   "try to pack */\n", cconf.indent, tabs,
> -					   pos->bit_hole,
> -					   pos->bit_hole != 1 ? "s" : "");
> -			sum_bit_holes += pos->bit_hole;
> -		}
> -
> -		if (pos->hole > 0 && !cconf.suppress_comments) {
> -			if (!newline++) {
> -				fputc('\n', fp);
> -				++printed;
> -			}
> -			printed += fprintf(fp, "%.*s/* XXX %d byte%s hole, "
> -					   "try to pack */\n",
> -					   cconf.indent, tabs, pos->hole,
> -					   pos->hole != 1 ? "s" : "");
> -			sum_holes += pos->hole;
> -		}
> -
>  		if (newline) {
>  			fputc('\n', fp);
>  			newline = 0;
> @@ -1428,6 +1428,30 @@ static size_t __class__fprintf(struct class *class, const struct cu *cu,
>  			}
>  		}
>  
> +		if (pos->bit_hole != 0 && !cconf.suppress_comments) {
> +			if (!newline++) {
> +				fputc('\n', fp);
> +				++printed;
> +			}
> +			printed += fprintf(fp, "\n%.*s/* XXX %d bit%s hole, "
> +					   "try to pack */", cconf.indent, tabs,
> +					   pos->bit_hole,
> +					   pos->bit_hole != 1 ? "s" : "");
> +			sum_bit_holes += pos->bit_hole;
> +		}
> +
> +		if (pos->hole > 0 && !cconf.suppress_comments) {
> +			if (!newline++) {
> +				fputc('\n', fp);
> +				++printed;
> +			}
> +			printed += fprintf(fp, "\n%.*s/* XXX %d byte%s hole, "
> +					   "try to pack */",
> +					   cconf.indent, tabs, pos->hole,
> +					   pos->hole != 1 ? "s" : "");
> +			sum_holes += pos->hole;
> +		}
> +
>  		fputc('\n', fp);
>  		++printed;
>  
> -- 
> 2.17.1

-- 

- Arnaldo



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux