Re: Differences in pahole output from BTF versus from DWARF

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Em Thu, Jan 10, 2019 at 02:50:32PM -0300, Arnaldo Carvalho de Melo escreveu:
> So, the size for an enumeration in BTF is in bytes, not in bits, as in
> CTF, fixed with the patch below, looking at the other cases now.

We're down to packed data structs, probably in the BTF encoder, see
below.

  $ diff -u /tmp/tcp.o.dwarf.c  /tmp/tcp.o.btf.c
  --- /tmp/tcp.o.dwarf.c	2019-01-10 12:10:26.890521322 -0300
  +++ /tmp/tcp.o.btf.c	2019-01-10 14:46:04.584743337 -0300
  @@ -585,11 +585,17 @@
   	__u16                      vesapm_off;           /*    48     2 */
   	__u16                      pages;                /*    50     2 */
   	__u16                      vesa_attributes;      /*    52     2 */
  -	__u32                      capabilities;         /*    54     4 */
  -	__u32                      ext_lfb_base;         /*    58     4 */
  +	__u32                      capabilities;         /*    52     4 */
  +	__u32                      ext_lfb_base;         /*    56     4 */
  +
  +	/* XXX 2 bytes hole, try to pack */
  +
   	__u8                       _reserved[2];         /*    62     2 */

Were these 4 extra bytes came from? The original struct is:

/*
 * These are set up by the setup-routine at boot-time:
 */

struct screen_info {
	__u8  orig_x;		/* 0x00 */
	__u8  orig_y;		/* 0x01 */
	__u16 ext_mem_k;	/* 0x02 */
	__u16 orig_video_page;	/* 0x04 */
	__u8  orig_video_mode;	/* 0x06 */
	__u8  orig_video_cols;	/* 0x07 */
	__u8  flags;		/* 0x08 */
	__u8  unused2;		/* 0x09 */
	__u16 orig_video_ega_bx;/* 0x0a */
	__u16 unused3;		/* 0x0c */
	__u8  orig_video_lines;	/* 0x0e */
	__u8  orig_video_isVGA;	/* 0x0f */
	__u16 orig_video_points;/* 0x10 */

	/* VESA graphic mode -- linear frame buffer */
	__u16 lfb_width;	/* 0x12 */
	__u16 lfb_height;	/* 0x14 */
	__u16 lfb_depth;	/* 0x16 */
	__u32 lfb_base;		/* 0x18 */
	__u32 lfb_size;		/* 0x1c */
	__u16 cl_magic, cl_offset; /* 0x20 */
	__u16 lfb_linelength;	/* 0x24 */
	__u8  red_size;		/* 0x26 */
	__u8  red_pos;		/* 0x27 */
	__u8  green_size;	/* 0x28 */
	__u8  green_pos;	/* 0x29 */
	__u8  blue_size;	/* 0x2a */
	__u8  blue_pos;		/* 0x2b */
	__u8  rsvd_size;	/* 0x2c */
	__u8  rsvd_pos;		/* 0x2d */
	__u16 vesapm_seg;	/* 0x2e */
	__u16 vesapm_off;	/* 0x30 */
	__u16 pages;		/* 0x32 */
	__u16 vesa_attributes;	/* 0x34 */
	__u32 capabilities;     /* 0x36 */
	__u32 ext_lfb_base;	/* 0x3a */
	__u8  _reserved[2];	/* 0x3e */
} __attribute__((packed));

$ pahole --hex -F dwarf -C screen_info ~/git/build/v4.20-rc5/net/ipv4/tcp.o
struct screen_info {
	__u8                       orig_x;               /*     0   0x1 */
	__u8                       orig_y;               /*   0x1   0x1 */
	__u16                      ext_mem_k;            /*   0x2   0x2 */
	__u16                      orig_video_page;      /*   0x4   0x2 */
	__u8                       orig_video_mode;      /*   0x6   0x1 */
	__u8                       orig_video_cols;      /*   0x7   0x1 */
	__u8                       flags;                /*   0x8   0x1 */
	__u8                       unused2;              /*   0x9   0x1 */
	__u16                      orig_video_ega_bx;    /*   0xa   0x2 */
	__u16                      unused3;              /*   0xc   0x2 */
	__u8                       orig_video_lines;     /*   0xe   0x1 */
	__u8                       orig_video_isVGA;     /*   0xf   0x1 */
	__u16                      orig_video_points;    /*  0x10   0x2 */
	__u16                      lfb_width;            /*  0x12   0x2 */
	__u16                      lfb_height;           /*  0x14   0x2 */
	__u16                      lfb_depth;            /*  0x16   0x2 */
	__u32                      lfb_base;             /*  0x18   0x4 */
	__u32                      lfb_size;             /*  0x1c   0x4 */
	__u16                      cl_magic;             /*  0x20   0x2 */
	__u16                      cl_offset;            /*  0x22   0x2 */
	__u16                      lfb_linelength;       /*  0x24   0x2 */
	__u8                       red_size;             /*  0x26   0x1 */
	__u8                       red_pos;              /*  0x27   0x1 */
	__u8                       green_size;           /*  0x28   0x1 */
	__u8                       green_pos;            /*  0x29   0x1 */
	__u8                       blue_size;            /*  0x2a   0x1 */
	__u8                       blue_pos;             /*  0x2b   0x1 */
	__u8                       rsvd_size;            /*  0x2c   0x1 */
	__u8                       rsvd_pos;             /*  0x2d   0x1 */
	__u16                      vesapm_seg;           /*  0x2e   0x2 */
	__u16                      vesapm_off;           /*  0x30   0x2 */
	__u16                      pages;                /*  0x32   0x2 */
	__u16                      vesa_attributes;      /*  0x34   0x2 */
	__u32                      capabilities;         /*  0x36   0x4 */
	__u32                      ext_lfb_base;         /*  0x3a   0x4 */
	__u8                       _reserved[2];         /*  0x3e   0x2 */

	/* size: 64, cachelines: 1, members: 36 */
};

the dwarf loader/pahole printer seem to be doing the right thing, the
BTF encoder, it seems, has issues with attribute((packed)) structs, see
below.

[acme@quaco pahole]$ pahole --hex -F dwarf -C screen_info ~/git/build/v4.20-rc5/net/ipv4/tcp.o > /tmp/dwarf
[acme@quaco pahole]$ pahole --hex -F btf -C screen_info ~/git/build/v4.20-rc5/net/ipv4/tcp.o > /tmp/btf
[acme@quaco pahole]$ diff -u /tmp/dwarf /tmp/btf
--- /tmp/dwarf	2019-01-10 15:00:14.946723885 -0300
+++ /tmp/btf	2019-01-10 15:00:18.669757798 -0300
@@ -32,9 +32,15 @@
 	__u16                      vesapm_off;           /*  0x30   0x2 */
 	__u16                      pages;                /*  0x32   0x2 */
 	__u16                      vesa_attributes;      /*  0x34   0x2 */
-	__u32                      capabilities;         /*  0x36   0x4 */
+	__u32                      capabilities;         /*  0x34   0x4 */
-	__u32                      ext_lfb_base;         /*  0x3a   0x4 */
+	__u32                      ext_lfb_base;         /*  0x38   0x4 */
+
+	/* XXX 2 bytes hole, try to pack */
+
 	__u8                       _reserved[2];         /*  0x3e   0x2 */

 	/* size: 64, cachelines: 1, members: 36 */
+	/* sum members: 60, holes: 1, sum holes: 2 */
+
+	/* BRAIN FART ALERT! 64 != 60 + 2(holes), diff = 2 */
 };
[acme@quaco pahole]$

Why the BTF encoder or loader is getting this in the wrong place...
Debugging...
   
   	/* size: 64, cachelines: 1, members: 36 */
  +	/* sum members: 60, holes: 1, sum holes: 2 */
  +
  +	/* BRAIN FART ALERT! 64 != 60 + 2(holes), diff = 2 */
   };
   struct apm_bios_info {
   	__u16                      version;              /*     0     2 */
  @@ -630,7 +636,10 @@
   	__u32                      sectors_per_track;    /*    12     4 */
   	__u64                      number_of_sectors;    /*    16     8 */
   	__u16                      bytes_per_sector;     /*    24     2 */
  -	__u32                      dpte_ptr;             /*    26     4 */
  +	__u32                      dpte_ptr;             /*    24     4 */


Looks like the same problem as with screen_info, 'dpte_ptr' got
"unionized" with 'bytes_per_sector'

  +
  +	/* XXX 2 bytes hole, try to pack */
  +
   	__u16                      key;                  /*    30     2 */
   	__u8                       device_path_info_length; /*    32     1 */
   	__u8                       reserved2;            /*    33     1 */
  @@ -683,7 +692,10 @@
   		} atapi;                                 /*    56    16 */
   		struct {
   			__u16      id;                   /*    56     2 */
  -			__u64      lun;                  /*    58     8 */
  +			__u64      lun;                  /*    56     8 */

Ditto, here it could be trying to put 'lun' in its natural alignment,
probably 'struct edd_device_params' is packed like screen_info? Yes...
include/uapi/linux/edd.h line 72

  +
  +			/* XXX 2 bytes hole, try to pack */
  +
   			/* --- cacheline 1 boundary (64 bytes) was 2 bytes ago --- */
   			__u16      reserved1;            /*    66     2 */
   			__u32      reserved2;            /*    68     4 */
  @@ -733,7 +745,10 @@
   	__u8                       checksum;             /*    73     1 */
   
   	/* size: 74, cachelines: 2, members: 18 */
  +	/* sum members: 70, holes: 1, sum holes: 2 */
   	/* last cacheline: 10 bytes */
  +
  +	/* BRAIN FART ALERT! 74 != 70 + 2(holes), diff = 2 */
   };
   struct edd_info {
   	__u8                       device;               /*     0     1 */
  @@ -849,10 +864,13 @@
   };
   struct desc_ptr {
   	short unsigned int         size;                 /*     0     2 */
  -	long unsigned int          address;              /*     2     8 */
  +	long unsigned int          address;              /*     0     8 */

  One more packed struct:

struct desc_ptr {
	unsigned short size;
	unsigned long address;
} __attribute__((packed)) ;
   
   	/* size: 10, cachelines: 1, members: 2 */
  +	/* padding: 2 */
   	/* last cacheline: 10 bytes */
  +
  +	/* BRAIN FART ALERT! 10 != 2 + 0(holes), diff = 8 */
   };
   struct pgprot {
   	pgprotval_t                pgprot;               /*     0     8 */
  @@ -1467,10 +1485,13 @@
   };
   struct x86_hw_tss {

Also a packed struct

   	u32                        reserved1;            /*     0     4 */
  -	u64                        sp0;                  /*     4     8 */
  -	u64                        sp1;                  /*    12     8 */
  -	u64                        sp2;                  /*    20     8 */
  -	u64                        reserved2;            /*    28     8 */
  +	u64                        sp0;                  /*     0     8 */
  +	u64                        sp1;                  /*     8     8 */
  +	u64                        sp2;                  /*    16     8 */
  +	u64                        reserved2;            /*    24     8 */
  +
  +	/* XXX 4 bytes hole, try to pack */
  +
   	u64                        ist[7];               /*    36    56 */
   	/* --- cacheline 1 boundary (64 bytes) was 28 bytes ago --- */
   	u32                        reserved3;            /*    92     4 */
  @@ -1479,7 +1500,10 @@
   	u16                        io_bitmap_base;       /*   102     2 */
   
   	/* size: 104, cachelines: 2, members: 10 */
  +	/* sum members: 96, holes: 1, sum holes: 4 */
   	/* last cacheline: 40 bytes */
  +
  +	/* BRAIN FART ALERT! 104 != 96 + 4(holes), diff = 4 */
   };
   struct seq_operations {
   	void *                     (*start)(struct seq_file *, loff_t *); /*     0     8 */
  

This last one is interesting, was this done on purpose? I think one
level of indirection more and we would keep the original expressiveness.

  @@ -8473,7 +8497,7 @@
   	struct lruvec_stat *       lruvec_stat_cpu;      /*   136     8 */
   	atomic_long_t              lruvec_stat[30];      /*   144   240 */
   	/* --- cacheline 6 boundary (384 bytes) --- */
  -	long unsigned int          lru_zone_size[5][5];  /*   384   200 */
  +	long unsigned int          lru_zone_size[25];    /*   384   200 */
   	/* --- cacheline 9 boundary (576 bytes) was 8 bytes ago --- */
   	struct mem_cgroup_reclaim_iter iter[13];         /*   584   208 */
   	/* --- cacheline 12 boundary (768 bytes) was 24 bytes ago --- */
  
  - Arnaldo



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux