On Tue, Apr 18, 2023 at 09:04:59PM +0200, Gerhard Engleder wrote: > Send and complete XSK pool frames within TX NAPI context. NAPI context > is triggered by ndo_xsk_wakeup. > > Test results with A53 1.2GHz: > > xdpsock txonly copy mode: > pps pkts 1.00 > tx 284,409 11,398,144 > Two CPUs with 100% and 10% utilization. > > xdpsock txonly zero-copy mode: > pps pkts 1.00 > tx 511,929 5,890,368 > Two CPUs with 100% and 1% utilization. Hmm, I think l2fwd ZC numbers should be included here not in the previous patch? > > Packet rate increases and CPU utilization is reduced. > > Signed-off-by: Gerhard Engleder <gerhard@xxxxxxxxxxxxxxxxxxxxx> > --- > drivers/net/ethernet/engleder/tsnep.h | 2 + > drivers/net/ethernet/engleder/tsnep_main.c | 127 +++++++++++++++++++-- > 2 files changed, 119 insertions(+), 10 deletions(-) > (...) > +static int tsnep_xdp_tx_map_zc(struct xdp_desc *xdpd, struct tsnep_tx *tx) > +{ > + struct tsnep_tx_entry *entry; > + dma_addr_t dma; > + > + entry = &tx->entry[tx->write]; > + entry->zc = true; > + > + dma = xsk_buff_raw_get_dma(tx->xsk_pool, xdpd->addr); > + xsk_buff_raw_dma_sync_for_device(tx->xsk_pool, dma, xdpd->len); > + > + entry->type = TSNEP_TX_TYPE_XSK; > + entry->len = xdpd->len; > + > + entry->desc->tx = __cpu_to_le64(dma); > + > + return xdpd->len; > +} > + > +static void tsnep_xdp_xmit_frame_ring_zc(struct xdp_desc *xdpd, > + struct tsnep_tx *tx) > +{ > + int length; > + > + length = tsnep_xdp_tx_map_zc(xdpd, tx); > + > + tsnep_tx_activate(tx, tx->write, length, true); > + tx->write = (tx->write + 1) & TSNEP_RING_MASK; > +} > + > +static void tsnep_xdp_xmit_zc(struct tsnep_tx *tx) > +{ > + int desc_available = tsnep_tx_desc_available(tx); > + struct xdp_desc *descs = tx->xsk_pool->tx_descs; > + int batch, i; > + > + /* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS > + * will be available for normal TX path and queue is stopped there if > + * necessary > + */ > + if (desc_available <= (MAX_SKB_FRAGS + 1)) > + return; > + desc_available -= MAX_SKB_FRAGS + 1; > + > + batch = xsk_tx_peek_release_desc_batch(tx->xsk_pool, desc_available); > + for (i = 0; i < batch; i++) > + tsnep_xdp_xmit_frame_ring_zc(&descs[i], tx); > + > + if (batch) { > + /* descriptor properties shall be valid before hardware is > + * notified > + */ > + dma_wmb(); > + > + tsnep_xdp_xmit_flush(tx); > + } > +} > + > static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) > { > struct tsnep_tx_entry *entry; > struct netdev_queue *nq; > + int xsk_frames = 0; > int budget = 128; > int length; > int count; > @@ -676,7 +771,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) > if ((entry->type & TSNEP_TX_TYPE_SKB) && > skb_shinfo(entry->skb)->nr_frags > 0) > count += skb_shinfo(entry->skb)->nr_frags; > - else if (!(entry->type & TSNEP_TX_TYPE_SKB) && > + else if ((entry->type & TSNEP_TX_TYPE_XDP) && > xdp_frame_has_frags(entry->xdpf)) > count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags; > > @@ -705,9 +800,11 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) > > if (entry->type & TSNEP_TX_TYPE_SKB) > napi_consume_skb(entry->skb, napi_budget); > - else > + else if (entry->type & TSNEP_TX_TYPE_XDP) > xdp_return_frame_rx_napi(entry->xdpf); > - /* xdpf is union with skb */ > + else > + xsk_frames++; > + /* xdpf and zc are union with skb */ > entry->skb = NULL; > > tx->read = (tx->read + count) & TSNEP_RING_MASK; > @@ -718,6 +815,14 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) > budget--; > } while (likely(budget)); > > + if (tx->xsk_pool) { > + if (xsk_frames) > + xsk_tx_completed(tx->xsk_pool, xsk_frames); > + if (xsk_uses_need_wakeup(tx->xsk_pool)) > + xsk_set_tx_need_wakeup(tx->xsk_pool); > + tsnep_xdp_xmit_zc(tx); would be good to signal to NAPI if we are done with the work or is there a need to be rescheduled (when you didn't manage to consume all of the descs from XSK Tx ring). > + } > + > if ((tsnep_tx_desc_available(tx) >= ((MAX_SKB_FRAGS + 1) * 2)) && > netif_tx_queue_stopped(nq)) { > netif_tx_wake_queue(nq); > @@ -765,12 +870,6 @@ static int tsnep_tx_open(struct tsnep_tx *tx) > > static void tsnep_tx_close(struct tsnep_tx *tx) > { > - u32 val; > - > - readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val, > - ((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000, > - 1000000); > - > tsnep_tx_ring_cleanup(tx); > } > > @@ -1786,12 +1885,18 @@ static void tsnep_queue_enable(struct tsnep_queue *queue) > napi_enable(&queue->napi); > tsnep_enable_irq(queue->adapter, queue->irq_mask); > > + if (queue->tx) > + tsnep_tx_enable(queue->tx); > + > if (queue->rx) > tsnep_rx_enable(queue->rx); > } > > static void tsnep_queue_disable(struct tsnep_queue *queue) > { > + if (queue->tx) > + tsnep_tx_disable(queue->tx, &queue->napi); > + > napi_disable(&queue->napi); > tsnep_disable_irq(queue->adapter, queue->irq_mask); > > @@ -1908,6 +2013,7 @@ int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool) > if (running) > tsnep_queue_disable(queue); > > + queue->tx->xsk_pool = pool; > queue->rx->xsk_pool = pool; > > if (running) { > @@ -1928,6 +2034,7 @@ void tsnep_disable_xsk(struct tsnep_queue *queue) > tsnep_rx_free_zc(queue->rx); > > queue->rx->xsk_pool = NULL; > + queue->tx->xsk_pool = NULL; > > if (running) { > tsnep_rx_reopen(queue->rx); > -- > 2.30.2 >