Analysis of the structure virtnet_info using pahole gives the following stats. /* size: 256, cachelines: 4, members: 25 */ /* sum members: 245, holes: 3, sum holes: 11 */ /* paddings: 1, sum paddings: 4 */ Reordering the order in which the members of virtnet_info are declared helps in packing byte holes in the middle of virtnet_info, reduce the size required by the structure by 8 bytes, and also allows members to be stored without overstepping the boundaries of a cacheline (for a cacheline of size 64bytes) unnecessarily. Analysis using pahole post-reordering of members gives the following stats. /* size: 248, cachelines: 4, members: 25 */ /* padding: 3 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 56 bytes */ Signed-off-by: Anant Thazhemadam <anant.thazhemadam@xxxxxxxxx> --- The complete analysis done by pahole can be found below. Before the change: struct virtnet_info { struct virtio_device * vdev; /* 0 8 */ struct virtqueue * cvq; /* 8 8 */ struct net_device * dev; /* 16 8 */ struct send_queue * sq; /* 24 8 */ struct receive_queue * rq; /* 32 8 */ unsigned int status; /* 40 4 */ u16 max_queue_pairs; /* 44 2 */ u16 curr_queue_pairs; /* 46 2 */ u16 xdp_queue_pairs; /* 48 2 */ bool big_packets; /* 50 1 */ bool mergeable_rx_bufs; /* 51 1 */ bool has_cvq; /* 52 1 */ bool any_header_sg; /* 53 1 */ u8 hdr_len; /* 54 1 */ /* XXX 1 byte hole, try to pack */ struct delayed_work refill; /* 56 88 */ /* XXX last struct has 4 bytes of padding */ /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */ struct work_struct config_work; /* 144 32 */ bool affinity_hint_set; /* 176 1 */ /* XXX 7 bytes hole, try to pack */ struct hlist_node node; /* 184 16 */ /* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */ struct hlist_node node_dead; /* 200 16 */ struct control_buf * ctrl; /* 216 8 */ u8 duplex; /* 224 1 */ /* XXX 3 bytes hole, try to pack */ u32 speed; /* 228 4 */ long unsigned int guest_offloads; /* 232 8 */ long unsigned int guest_offloads_capable; /* 240 8 */ struct failover * failover; /* 248 8 */ /* size: 256, cachelines: 4, members: 25 */ /* sum members: 245, holes: 3, sum holes: 11 */ /* paddings: 1, sum paddings: 4 */ }; After the Change: struct virtnet_info { struct virtio_device * vdev; /* 0 8 */ struct virtqueue * cvq; /* 8 8 */ struct net_device * dev; /* 16 8 */ struct send_queue * sq; /* 24 8 */ struct receive_queue * rq; /* 32 8 */ unsigned int status; /* 40 4 */ u16 max_queue_pairs; /* 44 2 */ u16 curr_queue_pairs; /* 46 2 */ u16 xdp_queue_pairs; /* 48 2 */ bool big_packets; /* 50 1 */ bool mergeable_rx_bufs; /* 51 1 */ bool has_cvq; /* 52 1 */ bool any_header_sg; /* 53 1 */ bool affinity_hint_set; /* 54 1 */ u8 hdr_len; /* 55 1 */ struct control_buf * ctrl; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ struct work_struct config_work; /* 64 32 */ struct hlist_node node; /* 96 16 */ struct hlist_node node_dead; /* 112 16 */ /* --- cacheline 2 boundary (128 bytes) --- */ long unsigned int guest_offloads; /* 128 8 */ long unsigned int guest_offloads_capable; /* 136 8 */ struct failover * failover; /* 144 8 */ struct delayed_work refill; /* 152 88 */ /* XXX last struct has 4 bytes of padding */ /* --- cacheline 3 boundary (192 bytes) was 48 bytes ago --- */ u32 speed; /* 240 4 */ u8 duplex; /* 244 1 */ /* size: 248, cachelines: 4, members: 25 */ /* padding: 3 */ /* paddings: 1, sum paddings: 4 */ /* last cacheline: 56 bytes */ }; It can be seen that the size has reduced by 8 bytes, and the holes have been eliminated as well. Also, more members of virtnet_info are accomodated within one cacheline (without unnecessarily crossing over the cacheline boundary). drivers/net/virtio_net.c | 42 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 263b005981bd..32747f1980ae 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -137,29 +137,29 @@ struct receive_queue { struct napi_struct napi; + /* Name of this receive queue: input.$index */ + char name[40]; + struct bpf_prog __rcu *xdp_prog; struct virtnet_rq_stats stats; + /* RX: fragments + linear part + virtio header */ + struct scatterlist sg[MAX_SKB_FRAGS + 2]; + + /* Page frag for packet buffer allocation. */ + struct page_frag alloc_frag; + /* Chain pages by the private ptr. */ struct page *pages; /* Average packet length for mergeable receive buffers. */ struct ewma_pkt_len mrg_avg_pkt_len; - /* Page frag for packet buffer allocation. */ - struct page_frag alloc_frag; - - /* RX: fragments + linear part + virtio header */ - struct scatterlist sg[MAX_SKB_FRAGS + 2]; + struct xdp_rxq_info xdp_rxq; /* Min single buffer size for mergeable buffers case. */ unsigned int min_buf_len; - - /* Name of this receive queue: input.$index */ - char name[40]; - - struct xdp_rxq_info xdp_rxq; }; /* Control VQ buffers: protected by the rtnl lock */ @@ -202,33 +202,33 @@ struct virtnet_info { /* Host can handle any s/g split between our header and packet data */ bool any_header_sg; + /* Does the affinity hint is set for virtqueues? */ + bool affinity_hint_set; + /* Packet virtio header size */ u8 hdr_len; - /* Work struct for refilling if we run low on memory. */ - struct delayed_work refill; + struct control_buf *ctrl; /* Work struct for config space updates */ struct work_struct config_work; - /* Does the affinity hint is set for virtqueues? */ - bool affinity_hint_set; - /* CPU hotplug instances for online & dead */ struct hlist_node node; struct hlist_node node_dead; - struct control_buf *ctrl; - - /* Ethtool settings */ - u8 duplex; - u32 speed; - unsigned long guest_offloads; unsigned long guest_offloads_capable; /* failover when STANDBY feature enabled */ struct failover *failover; + + /* Work struct for refilling if we run low on memory. */ + struct delayed_work refill; + + /* Ethtool settings */ + u32 speed; + u8 duplex; }; struct padded_vnet_hdr { -- 2.25.1