From: Magnus Karlsson <magnus.karlsson@xxxxxxxxx> Introduce one cache line worth of padding between the producer and consumer pointers in all the lockless rings. This so that the HW adjacency prefetcher will not prefetch the consumer pointer when the producer pointer is used and vice versa. This improves throughput performance for the l2fwd sample app with 2% on my machine with HW prefetching turned on. Signed-off-by: Magnus Karlsson <magnus.karlsson@xxxxxxxxx> --- net/xdp/xsk_queue.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index dc1dd5e..3c235d2 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -15,6 +15,10 @@ struct xdp_ring { u32 producer ____cacheline_aligned_in_smp; + /* Hinder the adjacent cache prefetcher to prefetch the consumer pointer if the producer + * pointer is touched and vice versa. + */ + u32 pad ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; u32 flags; }; -- 2.7.4