Analysis of the structure receive_queue using pahole gives the following stats. /* size: 1280, cachelines: 20, members: 11 */ /* sum members: 1220, holes: 1, sum holes: 60 */ /* paddings: 2, sum paddings: 44 */ /* forced alignments: 2, forced holes: 1, sum forced holes: 60 */ Reordering the order in which the members of receive_queue are declared helps in packing byte holes in the middle of receive_queue, and also allows more members to be fully stored in a cacheline (of size 64bytes) without overstepping over cachelines unnecessarily. Analysis using pahole post-reordering of members gives us the following stats. /* size: 1280, cachelines: 20, members: 11 */ /* padding: 60 */ /* paddings: 2, sum paddings: 44 */ /* forced alignments: 2 */ Signed-off-by: Anant Thazhemadam <anant.thazhemadam@xxxxxxxxx> --- The complete analysis done by pahole can be found below. Before the change: struct receive_queue { struct virtqueue * vq; /* 0 8 */ struct napi_struct napi __attribute__((__aligned__(8))); /* 8 392 */ /* XXX last struct has 4 bytes of padding */ /* --- cacheline 6 boundary (384 bytes) was 16 bytes ago --- */ struct bpf_prog * xdp_prog; /* 400 8 */ struct virtnet_rq_stats stats; /* 408 64 */ /* --- cacheline 7 boundary (448 bytes) was 24 bytes ago --- */ struct page * pages; /* 472 8 */ struct ewma_pkt_len mrg_avg_pkt_len; /* 480 8 */ struct page_frag alloc_frag; /* 488 16 */ struct scatterlist sg[19]; /* 504 608 */ /* --- cacheline 17 boundary (1088 bytes) was 24 bytes ago --- */ unsigned int min_buf_len; /* 1112 4 */ char name[40]; /* 1116 40 */ /* XXX 60 bytes hole, try to pack */ /* --- cacheline 19 boundary (1216 bytes) --- */ struct xdp_rxq_info xdp_rxq __attribute__((__aligned__(64))); /* 1216 64 */ /* XXX last struct has 40 bytes of padding */ /* size: 1280, cachelines: 20, members: 11 */ /* sum members: 1220, holes: 1, sum holes: 60 */ /* paddings: 2, sum paddings: 44 */ /* forced alignments: 2, forced holes: 1, sum forced holes: 60 */ } __attribute__((__aligned__(64))); After the change: struct receive_queue { struct virtqueue * vq; /* 0 8 */ struct napi_struct napi __attribute__((__aligned__(8))); /* 8 392 */ /* XXX last struct has 4 bytes of padding */ /* --- cacheline 6 boundary (384 bytes) was 16 bytes ago --- */ char name[40]; /* 400 40 */ struct bpf_prog * xdp_prog; /* 440 8 */ /* --- cacheline 7 boundary (448 bytes) --- */ struct virtnet_rq_stats stats; /* 448 64 */ /* --- cacheline 8 boundary (512 bytes) --- */ struct scatterlist sg[19]; /* 512 608 */ /* --- cacheline 17 boundary (1088 bytes) was 32 bytes ago --- */ struct page_frag alloc_frag; /* 1120 16 */ struct page * pages; /* 1136 8 */ struct ewma_pkt_len mrg_avg_pkt_len; /* 1144 8 */ /* --- cacheline 18 boundary (1152 bytes) --- */ struct xdp_rxq_info xdp_rxq __attribute__((__aligned__(64))); /* 1152 64 */ /* XXX last struct has 40 bytes of padding */ /* --- cacheline 19 boundary (1216 bytes) --- */ unsigned int min_buf_len; /* 1216 4 */ /* size: 1280, cachelines: 20, members: 11 */ /* padding: 60 */ /* paddings: 2, sum paddings: 44 */ /* forced alignments: 2 */ } __attribute__((__aligned__(64))); It can be observed that the holes have been eliminated. Also, more members of virtnet_info are accomodated within a cacheline (instead of unnecessarily crossing over the cacheline boundary). There is a padding of 60 performed at the end since the min_buf_len is only of size 4, and xdp_rxq is of size 64. If declared anywhere else other than at the end, a 60 bytes hole would open up again. drivers/net/virtio_net.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f7bd85001cf0..b52db0b4879a 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -137,29 +137,29 @@ struct receive_queue { struct napi_struct napi; + /* Name of this receive queue: input.$index */ + char name[40]; + struct bpf_prog __rcu *xdp_prog; struct virtnet_rq_stats stats; + /* RX: fragments + linear part + virtio header */ + struct scatterlist sg[MAX_SKB_FRAGS + 2]; + + /* Page frag for packet buffer allocation. */ + struct page_frag alloc_frag; + /* Chain pages by the private ptr. */ struct page *pages; /* Average packet length for mergeable receive buffers. */ struct ewma_pkt_len mrg_avg_pkt_len; - /* Page frag for packet buffer allocation. */ - struct page_frag alloc_frag; - - /* RX: fragments + linear part + virtio header */ - struct scatterlist sg[MAX_SKB_FRAGS + 2]; + struct xdp_rxq_info xdp_rxq; /* Min single buffer size for mergeable buffers case. */ unsigned int min_buf_len; - - /* Name of this receive queue: input.$index */ - char name[40]; - - struct xdp_rxq_info xdp_rxq; }; /* Control VQ buffers: protected by the rtnl lock */ -- 2.25.1