On Tue, Sep 24, 2024 at 9:32 AM Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> wrote: > > The driver's tx napi is very important for XSK. It is responsible for > obtaining data from the XSK queue and sending it out. > > At the beginning, we need to trigger tx napi. > > virtnet_free_old_xmit distinguishes three type ptr(skb, xdp frame, xsk > buffer) by the last bits of the pointer. > > Signed-off-by: Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> > --- > drivers/net/virtio_net.c | 176 ++++++++++++++++++++++++++++++++++++--- > 1 file changed, 166 insertions(+), 10 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 3ad4c6e3ef18..1a870f1df910 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -83,6 +83,7 @@ struct virtnet_sq_free_stats { > u64 bytes; > u64 napi_packets; > u64 napi_bytes; > + u64 xsk; > }; > > struct virtnet_sq_stats { > @@ -514,16 +515,20 @@ static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, > struct sk_buff *curr_skb, > struct page *page, void *buf, > int len, int truesize); > +static void virtnet_xsk_completed(struct send_queue *sq, int num); > > enum virtnet_xmit_type { > VIRTNET_XMIT_TYPE_SKB, > VIRTNET_XMIT_TYPE_SKB_ORPHAN, > VIRTNET_XMIT_TYPE_XDP, > + VIRTNET_XMIT_TYPE_XSK, > }; > > /* We use the last two bits of the pointer to distinguish the xmit type. */ > #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1)) > > +#define VIRTIO_XSK_FLAG_OFFSET 4 Any reason this is not 2? > + > static enum virtnet_xmit_type virtnet_xmit_ptr_strip(void **ptr) > { > unsigned long p = (unsigned long)*ptr; > @@ -546,6 +551,11 @@ static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data, > GFP_ATOMIC); > } > > +static u32 virtnet_ptr_to_xsk_buff_len(void *ptr) > +{ > + return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET; > +} > + > static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) > { > sg_assign_page(sg, NULL); > @@ -587,11 +597,27 @@ static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, > stats->bytes += xdp_get_frame_len(frame); > xdp_return_frame(frame); > break; > + > + case VIRTNET_XMIT_TYPE_XSK: > + stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr); > + stats->xsk++; > + break; > } > } > netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); Not related to this patch, but this seems unnecessary to AF_XDP. > } > > +static void virtnet_free_old_xmit(struct send_queue *sq, > + struct netdev_queue *txq, > + bool in_napi, > + struct virtnet_sq_free_stats *stats) > +{ > + __free_old_xmit(sq, txq, in_napi, stats); > + > + if (stats->xsk) > + virtnet_xsk_completed(sq, stats->xsk); > +} > + > /* Converting between virtqueue no. and kernel tx/rx queue no. > * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq > */ > @@ -1019,7 +1045,7 @@ static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, > { > struct virtnet_sq_free_stats stats = {0}; > > - __free_old_xmit(sq, txq, in_napi, &stats); > + virtnet_free_old_xmit(sq, txq, in_napi, &stats); > > /* Avoid overhead when no packets have been processed > * happens when called speculatively from start_xmit. > @@ -1380,6 +1406,111 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue > return err; > } > > +static void *virtnet_xsk_to_ptr(u32 len) > +{ > + unsigned long p; > + > + p = len << VIRTIO_XSK_FLAG_OFFSET; > + > + return virtnet_xmit_ptr_mix((void *)p, VIRTNET_XMIT_TYPE_XSK); > +} > + > +static int virtnet_xsk_xmit_one(struct send_queue *sq, > + struct xsk_buff_pool *pool, > + struct xdp_desc *desc) > +{ > + struct virtnet_info *vi; > + dma_addr_t addr; > + > + vi = sq->vq->vdev->priv; > + > + addr = xsk_buff_raw_get_dma(pool, desc->addr); > + xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len); > + > + sg_init_table(sq->sg, 2); > + > + sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len); > + sg_fill_dma(sq->sg + 1, addr, desc->len); > + > + return virtqueue_add_outbuf(sq->vq, sq->sg, 2, > + virtnet_xsk_to_ptr(desc->len), GFP_ATOMIC); > +} > + > +static int virtnet_xsk_xmit_batch(struct send_queue *sq, > + struct xsk_buff_pool *pool, > + unsigned int budget, > + u64 *kicks) > +{ > + struct xdp_desc *descs = pool->tx_descs; > + bool kick = false; > + u32 nb_pkts, i; > + int err; > + > + budget = min_t(u32, budget, sq->vq->num_free); > + > + nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget); > + if (!nb_pkts) > + return 0; > + > + for (i = 0; i < nb_pkts; i++) { > + err = virtnet_xsk_xmit_one(sq, pool, &descs[i]); > + if (unlikely(err)) { > + xsk_tx_completed(sq->xsk_pool, nb_pkts - i); > + break; > + } > + > + kick = true; > + } > + > + if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) > + (*kicks)++; > + > + return i; > +} > + > +static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, > + int budget) > +{ > + struct virtnet_info *vi = sq->vq->vdev->priv; > + struct virtnet_sq_free_stats stats = {}; > + struct net_device *dev = vi->dev; > + u64 kicks = 0; > + int sent; > + > + /* Avoid to wakeup napi meanless, so call __free_old_xmit. */ I don't understand the meaning of this comment. > + __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats); > + > + if (stats.xsk) > + xsk_tx_completed(sq->xsk_pool, stats.xsk); > + > + sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks); > + > + if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) > + check_sq_full_and_disable(vi, vi->dev, sq); > + > + u64_stats_update_begin(&sq->stats.syncp); > + u64_stats_add(&sq->stats.packets, stats.packets); > + u64_stats_add(&sq->stats.bytes, stats.bytes); > + u64_stats_add(&sq->stats.kicks, kicks); > + u64_stats_add(&sq->stats.xdp_tx, sent); > + u64_stats_update_end(&sq->stats.syncp); > + > + if (xsk_uses_need_wakeup(pool)) > + xsk_set_tx_need_wakeup(pool); > + > + return sent == budget; > +} > + > +static void xsk_wakeup(struct send_queue *sq) > +{ > + if (napi_if_scheduled_mark_missed(&sq->napi)) > + return; > + > + local_bh_disable(); > + virtqueue_napi_schedule(&sq->napi, sq->vq); > + local_bh_enable(); > +} > + > static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) > { > struct virtnet_info *vi = netdev_priv(dev); > @@ -1393,14 +1524,19 @@ static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) > > sq = &vi->sq[qid]; > > - if (napi_if_scheduled_mark_missed(&sq->napi)) > - return 0; > + xsk_wakeup(sq); > + return 0; > +} > > - local_bh_disable(); > - virtqueue_napi_schedule(&sq->napi, sq->vq); > - local_bh_enable(); > +static void virtnet_xsk_completed(struct send_queue *sq, int num) > +{ > + xsk_tx_completed(sq->xsk_pool, num); > > - return 0; > + /* If this is called by rx poll, start_xmit and xdp xmit we should > + * wakeup the tx napi to consume the xsk tx queue, because the tx > + * interrupt may not be triggered. > + */ > + xsk_wakeup(sq); > } > > static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, > @@ -1516,8 +1652,8 @@ static int virtnet_xdp_xmit(struct net_device *dev, > } > > /* Free up any pending old buffers before queueing new ones. */ > - __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), > - false, &stats); > + virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), > + false, &stats); > > for (i = 0; i < n; i++) { > struct xdp_frame *xdpf = frames[i]; > @@ -2961,6 +3097,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) > struct virtnet_info *vi = sq->vq->vdev->priv; > unsigned int index = vq2txq(sq->vq); > struct netdev_queue *txq; > + bool xsk_busy = false; > int opaque; > bool done; > > @@ -2973,7 +3110,11 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) > txq = netdev_get_tx_queue(vi->dev, index); > __netif_tx_lock(txq, raw_smp_processor_id()); > virtqueue_disable_cb(sq->vq); > - free_old_xmit(sq, txq, !!budget); > + > + if (sq->xsk_pool) > + xsk_busy = virtnet_xsk_xmit(sq, sq->xsk_pool, budget); I think we need a better name of "xsk_busy", it looks like it means we exceeds the quota. Or just return the number of buffers received and let the caller to judge. Other looks good. With this fixed. Acked-by: Jason Wang <jasowang@xxxxxxxxxx> Thanks