On Thu, Feb 06, 2025 at 07:26:27PM +0100, Alexander Lobakin wrote: > i40e, as well as ice, has a custom loop unrolling macro for unrolling > Tx descriptors filling on XSk xmit. > Replace i40e defs with generic unrolled_count(), which is also more > convenient as it allows passing defines as its argument, not hardcoded > values, while the loop declaration will still be a usual for-loop. > > Signed-off-by: Alexander Lobakin <aleksander.lobakin@xxxxxxxxx> Acked-by: Maciej Fijalkowski <maciej.fijalkowski@xxxxxxxxx> > --- > drivers/net/ethernet/intel/i40e/i40e_xsk.h | 10 +--------- > drivers/net/ethernet/intel/i40e/i40e_xsk.c | 4 +++- > 2 files changed, 4 insertions(+), 10 deletions(-) > > diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h > index ef156fad52f2..dd16351a7af8 100644 > --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h > +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h > @@ -6,7 +6,7 @@ > > #include <linux/types.h> > > -/* This value should match the pragma in the loop_unrolled_for > +/* This value should match the pragma in the unrolled_count() > * macro. Why 4? It is strictly empirical. It seems to be a good > * compromise between the advantage of having simultaneous outstanding > * reads to the DMA array that can hide each others latency and the > @@ -14,14 +14,6 @@ > */ > #define PKTS_PER_BATCH 4 > > -#ifdef __clang__ > -#define loop_unrolled_for _Pragma("clang loop unroll_count(4)") for > -#elif __GNUC__ >= 8 > -#define loop_unrolled_for _Pragma("GCC unroll 4") for > -#else > -#define loop_unrolled_for for > -#endif > - > struct i40e_ring; > struct i40e_vsi; > struct net_device; > diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > index e28f1905a4a0..9f47388eaba5 100644 > --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c > +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > @@ -2,6 +2,7 @@ > /* Copyright(c) 2018 Intel Corporation. */ > > #include <linux/bpf_trace.h> > +#include <linux/unroll.h> > #include <net/xdp_sock_drv.h> > #include "i40e_txrx_common.h" > #include "i40e_xsk.h" > @@ -529,7 +530,8 @@ static void i40e_xmit_pkt_batch(struct i40e_ring *xdp_ring, struct xdp_desc *des > dma_addr_t dma; > u32 i; > > - loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { > + unrolled_count(PKTS_PER_BATCH) > + for (i = 0; i < PKTS_PER_BATCH; i++) { > u32 cmd = I40E_TX_DESC_CMD_ICRC | xsk_is_eop_desc(&desc[i]); > > dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc[i].addr); > -- > 2.48.1 >