On Tue, Jan 25, 2022 at 11:58 PM Maciej Fijalkowski <maciej.fijalkowski@xxxxxxxxx> wrote: > > One of the things that commit 5574ff7b7b3d ("i40e: optimize AF_XDP Tx > completion path") introduced was the @xdp_tx_active field. Its usage > from i40e can be adjusted to ice driver and give us positive performance > results. > > If the descriptor that @next_dd points to has been sent by HW (its DD > bit is set), then we are sure that at least quarter of the ring is ready > to be cleaned. If @xdp_tx_active is 0 which means that related xdp_ring > is not used for XDP_{TX, REDIRECT} workloads, then we know how many XSK > entries should placed to completion queue, IOW walking through the ring > can be skipped. Thanks Maciej. Acked-by: Magnus Karlsson <magnus.karlsson@xxxxxxxxx> > Reviewed-by: Alexander Lobakin <alexandr.lobakin@xxxxxxxxx> > Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@xxxxxxxxx> > --- > drivers/net/ethernet/intel/ice/ice_txrx.h | 1 + > drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 1 + > drivers/net/ethernet/intel/ice/ice_xsk.c | 15 ++++++++++++--- > 3 files changed, 14 insertions(+), 3 deletions(-) > > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h > index 666db35a2919..466253ac2ee1 100644 > --- a/drivers/net/ethernet/intel/ice/ice_txrx.h > +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h > @@ -333,6 +333,7 @@ struct ice_tx_ring { > spinlock_t tx_lock; > u32 txq_teid; /* Added Tx queue TEID */ > /* CL4 - 4th cacheline starts here */ > + u16 xdp_tx_active; > #define ICE_TX_FLAGS_RING_XDP BIT(0) > u8 flags; > u8 dcb_tc; /* Traffic class of ring */ > diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c > index 9677cf880a4b..eb21cec1d772 100644 > --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c > +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c > @@ -302,6 +302,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) > tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0, > size, 0); > > + xdp_ring->xdp_tx_active++; > i++; > if (i == xdp_ring->count) { > i = 0; > diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c > index 8b6acb4afb7f..2976991c0ab2 100644 > --- a/drivers/net/ethernet/intel/ice/ice_xsk.c > +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c > @@ -687,6 +687,7 @@ static void > ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) > { > xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); > + xdp_ring->xdp_tx_active--; > dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), > dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); > dma_unmap_len_set(tx_buf, len, 0); > @@ -703,9 +704,8 @@ static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget) > { > u16 tx_thresh = ICE_RING_QUARTER(xdp_ring); > int budget = napi_budget / tx_thresh; > - u16 ntc = xdp_ring->next_to_clean; > u16 next_dd = xdp_ring->next_dd; > - u16 cleared_dds = 0; > + u16 ntc, cleared_dds = 0; > > do { > struct ice_tx_desc *next_dd_desc; > @@ -721,6 +721,12 @@ static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget) > > cleared_dds++; > xsk_frames = 0; > + if (likely(!xdp_ring->xdp_tx_active)) { > + xsk_frames = tx_thresh; > + goto skip; > + } > + > + ntc = xdp_ring->next_to_clean; > > for (i = 0; i < tx_thresh; i++) { > tx_buf = &xdp_ring->tx_buf[ntc]; > @@ -736,6 +742,10 @@ static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget) > if (ntc >= xdp_ring->count) > ntc = 0; > } > +skip: > + xdp_ring->next_to_clean += tx_thresh; > + if (xdp_ring->next_to_clean >= desc_cnt) > + xdp_ring->next_to_clean -= desc_cnt; > if (xsk_frames) > xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); > next_dd_desc->cmd_type_offset_bsz = 0; > @@ -744,7 +754,6 @@ static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget) > next_dd = tx_thresh - 1; > } while (budget--); > > - xdp_ring->next_to_clean = ntc; > xdp_ring->next_dd = next_dd; > > return cleared_dds * tx_thresh; > -- > 2.33.1 >