On Tue, 24 Sep 2024 15:35:08 +0800, Jason Wang <jasowang@xxxxxxxxxx> wrote: > On Tue, Sep 24, 2024 at 9:32 AM Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> wrote: > > > > The driver's tx napi is very important for XSK. It is responsible for > > obtaining data from the XSK queue and sending it out. > > > > At the beginning, we need to trigger tx napi. > > > > virtnet_free_old_xmit distinguishes three type ptr(skb, xdp frame, xsk > > buffer) by the last bits of the pointer. > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> > > --- > > drivers/net/virtio_net.c | 176 ++++++++++++++++++++++++++++++++++++--- > > 1 file changed, 166 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index 3ad4c6e3ef18..1a870f1df910 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -83,6 +83,7 @@ struct virtnet_sq_free_stats { > > u64 bytes; > > u64 napi_packets; > > u64 napi_bytes; > > + u64 xsk; > > }; > > > > struct virtnet_sq_stats { > > @@ -514,16 +515,20 @@ static struct sk_buff *virtnet_skb_append_frag(struct sk_buff *head_skb, > > struct sk_buff *curr_skb, > > struct page *page, void *buf, > > int len, int truesize); > > +static void virtnet_xsk_completed(struct send_queue *sq, int num); > > > > enum virtnet_xmit_type { > > VIRTNET_XMIT_TYPE_SKB, > > VIRTNET_XMIT_TYPE_SKB_ORPHAN, > > VIRTNET_XMIT_TYPE_XDP, > > + VIRTNET_XMIT_TYPE_XSK, > > }; > > > > /* We use the last two bits of the pointer to distinguish the xmit type. */ > > #define VIRTNET_XMIT_TYPE_MASK (BIT(0) | BIT(1)) > > > > +#define VIRTIO_XSK_FLAG_OFFSET 4 > > Any reason this is not 2? There's no particular reason for this, any value greater than 2 will work. > > > + > > static enum virtnet_xmit_type virtnet_xmit_ptr_strip(void **ptr) > > { > > unsigned long p = (unsigned long)*ptr; > > @@ -546,6 +551,11 @@ static int virtnet_add_outbuf(struct send_queue *sq, int num, void *data, > > GFP_ATOMIC); > > } > > > > +static u32 virtnet_ptr_to_xsk_buff_len(void *ptr) > > +{ > > + return ((unsigned long)ptr) >> VIRTIO_XSK_FLAG_OFFSET; > > +} > > + > > static void sg_fill_dma(struct scatterlist *sg, dma_addr_t addr, u32 len) > > { > > sg_assign_page(sg, NULL); > > @@ -587,11 +597,27 @@ static void __free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, > > stats->bytes += xdp_get_frame_len(frame); > > xdp_return_frame(frame); > > break; > > + > > + case VIRTNET_XMIT_TYPE_XSK: > > + stats->bytes += virtnet_ptr_to_xsk_buff_len(ptr); > > + stats->xsk++; > > + break; > > } > > } > > netdev_tx_completed_queue(txq, stats->napi_packets, stats->napi_bytes); > > Not related to this patch, but this seems unnecessary to AF_XDP. YES. netdev_tx_completed_queue will check napi_bytes firstly. So I do not think we need to do anything for this. > > > } > > > > +static void virtnet_free_old_xmit(struct send_queue *sq, > > + struct netdev_queue *txq, > > + bool in_napi, > > + struct virtnet_sq_free_stats *stats) > > +{ > > + __free_old_xmit(sq, txq, in_napi, stats); > > + > > + if (stats->xsk) > > + virtnet_xsk_completed(sq, stats->xsk); > > +} > > + > > /* Converting between virtqueue no. and kernel tx/rx queue no. > > * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq > > */ > > @@ -1019,7 +1045,7 @@ static void free_old_xmit(struct send_queue *sq, struct netdev_queue *txq, > > { > > struct virtnet_sq_free_stats stats = {0}; > > > > - __free_old_xmit(sq, txq, in_napi, &stats); > > + virtnet_free_old_xmit(sq, txq, in_napi, &stats); > > > > /* Avoid overhead when no packets have been processed > > * happens when called speculatively from start_xmit. > > @@ -1380,6 +1406,111 @@ static int virtnet_add_recvbuf_xsk(struct virtnet_info *vi, struct receive_queue > > return err; > > } > > > > +static void *virtnet_xsk_to_ptr(u32 len) > > +{ > > + unsigned long p; > > + > > + p = len << VIRTIO_XSK_FLAG_OFFSET; > > + > > + return virtnet_xmit_ptr_mix((void *)p, VIRTNET_XMIT_TYPE_XSK); > > +} > > + > > +static int virtnet_xsk_xmit_one(struct send_queue *sq, > > + struct xsk_buff_pool *pool, > > + struct xdp_desc *desc) > > +{ > > + struct virtnet_info *vi; > > + dma_addr_t addr; > > + > > + vi = sq->vq->vdev->priv; > > + > > + addr = xsk_buff_raw_get_dma(pool, desc->addr); > > + xsk_buff_raw_dma_sync_for_device(pool, addr, desc->len); > > + > > + sg_init_table(sq->sg, 2); > > + > > + sg_fill_dma(sq->sg, sq->xsk_hdr_dma_addr, vi->hdr_len); > > + sg_fill_dma(sq->sg + 1, addr, desc->len); > > + > > + return virtqueue_add_outbuf(sq->vq, sq->sg, 2, > > + virtnet_xsk_to_ptr(desc->len), GFP_ATOMIC); > > +} > > + > > +static int virtnet_xsk_xmit_batch(struct send_queue *sq, > > + struct xsk_buff_pool *pool, > > + unsigned int budget, > > + u64 *kicks) > > +{ > > + struct xdp_desc *descs = pool->tx_descs; > > + bool kick = false; > > + u32 nb_pkts, i; > > + int err; > > + > > + budget = min_t(u32, budget, sq->vq->num_free); > > + > > + nb_pkts = xsk_tx_peek_release_desc_batch(pool, budget); > > + if (!nb_pkts) > > + return 0; > > + > > + for (i = 0; i < nb_pkts; i++) { > > + err = virtnet_xsk_xmit_one(sq, pool, &descs[i]); > > + if (unlikely(err)) { > > + xsk_tx_completed(sq->xsk_pool, nb_pkts - i); > > + break; > > + } > > + > > + kick = true; > > + } > > + > > + if (kick && virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) > > + (*kicks)++; > > + > > + return i; > > +} > > + > > +static bool virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, > > + int budget) > > +{ > > + struct virtnet_info *vi = sq->vq->vdev->priv; > > + struct virtnet_sq_free_stats stats = {}; > > + struct net_device *dev = vi->dev; > > + u64 kicks = 0; > > + int sent; > > + > > + /* Avoid to wakeup napi meanless, so call __free_old_xmit. */ > > I don't understand the meaning of this comment. The comments need to be more detailed. Here I want to explain why not just use free_old_xmit. > > > + __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), true, &stats); > > + > > + if (stats.xsk) > > + xsk_tx_completed(sq->xsk_pool, stats.xsk); > > + > > + sent = virtnet_xsk_xmit_batch(sq, pool, budget, &kicks); > > + > > + if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq)) > > + check_sq_full_and_disable(vi, vi->dev, sq); > > + > > + u64_stats_update_begin(&sq->stats.syncp); > > + u64_stats_add(&sq->stats.packets, stats.packets); > > + u64_stats_add(&sq->stats.bytes, stats.bytes); > > + u64_stats_add(&sq->stats.kicks, kicks); > > + u64_stats_add(&sq->stats.xdp_tx, sent); > > + u64_stats_update_end(&sq->stats.syncp); > > + > > + if (xsk_uses_need_wakeup(pool)) > > + xsk_set_tx_need_wakeup(pool); > > + > > + return sent == budget; > > +} > > + > > +static void xsk_wakeup(struct send_queue *sq) > > +{ > > + if (napi_if_scheduled_mark_missed(&sq->napi)) > > + return; > > + > > + local_bh_disable(); > > + virtqueue_napi_schedule(&sq->napi, sq->vq); > > + local_bh_enable(); > > +} > > + > > static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) > > { > > struct virtnet_info *vi = netdev_priv(dev); > > @@ -1393,14 +1524,19 @@ static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) > > > > sq = &vi->sq[qid]; > > > > - if (napi_if_scheduled_mark_missed(&sq->napi)) > > - return 0; > > + xsk_wakeup(sq); > > + return 0; > > +} > > > > - local_bh_disable(); > > - virtqueue_napi_schedule(&sq->napi, sq->vq); > > - local_bh_enable(); > > +static void virtnet_xsk_completed(struct send_queue *sq, int num) > > +{ > > + xsk_tx_completed(sq->xsk_pool, num); > > > > - return 0; > > + /* If this is called by rx poll, start_xmit and xdp xmit we should > > + * wakeup the tx napi to consume the xsk tx queue, because the tx > > + * interrupt may not be triggered. > > + */ > > + xsk_wakeup(sq); > > } > > > > static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, > > @@ -1516,8 +1652,8 @@ static int virtnet_xdp_xmit(struct net_device *dev, > > } > > > > /* Free up any pending old buffers before queueing new ones. */ > > - __free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), > > - false, &stats); > > + virtnet_free_old_xmit(sq, netdev_get_tx_queue(dev, sq - vi->sq), > > + false, &stats); > > > > for (i = 0; i < n; i++) { > > struct xdp_frame *xdpf = frames[i]; > > @@ -2961,6 +3097,7 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) > > struct virtnet_info *vi = sq->vq->vdev->priv; > > unsigned int index = vq2txq(sq->vq); > > struct netdev_queue *txq; > > + bool xsk_busy = false; > > int opaque; > > bool done; > > > > @@ -2973,7 +3110,11 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) > > txq = netdev_get_tx_queue(vi->dev, index); > > __netif_tx_lock(txq, raw_smp_processor_id()); > > virtqueue_disable_cb(sq->vq); > > - free_old_xmit(sq, txq, !!budget); > > + > > + if (sq->xsk_pool) > > + xsk_busy = virtnet_xsk_xmit(sq, sq->xsk_pool, budget); > > I think we need a better name of "xsk_busy", it looks like it means we > exceeds the quota. Or just return the number of buffers received and > let the caller to judge. Will fix. Thanks. > > Other looks good. > > With this fixed. > > Acked-by: Jason Wang <jasowang@xxxxxxxxxx> > > Thanks >