On Thu, Nov 9, 2023 at 7:06 PM Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> wrote: > > On Thu, 9 Nov 2023 14:37:38 +0800, Jason Wang <jasowang@xxxxxxxxxx> wrote: > > On Tue, Nov 7, 2023 at 11:12 AM Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> wrote: > > > > > > If the xsk is enabling, the xsk tx will share the send queue. > > > But the xsk requires that the send queue use the premapped mode. > > > So the send queue must support premapped mode. > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> > > > --- > > > drivers/net/virtio/main.c | 163 ++++++++++++++++++++++++++++---- > > > drivers/net/virtio/virtio_net.h | 16 ++++ > > > 2 files changed, 163 insertions(+), 16 deletions(-) > > > > > > diff --git a/drivers/net/virtio/main.c b/drivers/net/virtio/main.c > > > index 16e75c08639e..f052db459156 100644 > > > --- a/drivers/net/virtio/main.c > > > +++ b/drivers/net/virtio/main.c > > > @@ -46,6 +46,7 @@ module_param(napi_tx, bool, 0644); > > > #define VIRTIO_XDP_REDIR BIT(1) > > > > > > #define VIRTIO_XDP_FLAG BIT(0) > > > +#define VIRTIO_XMIT_DATA_MASK (VIRTIO_XDP_FLAG) > > > > > > #define VIRTNET_DRIVER_VERSION "1.0.0" > > > > > > @@ -167,6 +168,29 @@ static struct xdp_frame *ptr_to_xdp(void *ptr) > > > return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); > > > } > > > > > > +static inline void *virtnet_sq_unmap(struct virtnet_sq *sq, void *data) > > > +{ > > > + struct virtnet_sq_dma *next, *head; > > > + > > > + head = (void *)((unsigned long)data & ~VIRTIO_XMIT_DATA_MASK); > > > + > > > + data = head->data; > > > + > > > + while (head) { > > > + virtqueue_dma_unmap_single_attrs(sq->vq, head->addr, head->len, > > > + DMA_TO_DEVICE, 0); > > > + > > > + next = head->next; > > > + > > > + head->next = sq->dmainfo.free; > > > + sq->dmainfo.free = head; > > > + > > > + head = next; > > > + } > > > + > > > + return data; > > > +} > > > + > > > static void __free_old_xmit(struct virtnet_sq *sq, bool in_napi, > > > u64 *bytes, u64 *packets) > > > { > > > @@ -175,14 +199,24 @@ static void __free_old_xmit(struct virtnet_sq *sq, bool in_napi, > > > > > > while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { > > > if (!is_xdp_frame(ptr)) { > > > - struct sk_buff *skb = ptr; > > > + struct sk_buff *skb; > > > + > > > + if (sq->do_dma) > > > + ptr = virtnet_sq_unmap(sq, ptr); > > > + > > > + skb = ptr; > > > > > > pr_debug("Sent skb %p\n", skb); > > > > > > *bytes += skb->len; > > > napi_consume_skb(skb, in_napi); > > > } else { > > > - struct xdp_frame *frame = ptr_to_xdp(ptr); > > > + struct xdp_frame *frame; > > > + > > > + if (sq->do_dma) > > > + ptr = virtnet_sq_unmap(sq, ptr); > > > + > > > + frame = ptr_to_xdp(ptr); > > > > > > *bytes += xdp_get_frame_len(frame); > > > xdp_return_frame(frame); > > > @@ -567,22 +601,104 @@ static void *virtnet_rq_alloc(struct virtnet_rq *rq, u32 size, gfp_t gfp) > > > return buf; > > > } > > > > > > -static void virtnet_rq_set_premapped(struct virtnet_info *vi) > > > +static int virtnet_sq_set_premapped(struct virtnet_sq *sq) > > > { > > > - int i; > > > + struct virtnet_sq_dma *d; > > > + int err, size, i; > > > > > > - /* disable for big mode */ > > > - if (!vi->mergeable_rx_bufs && vi->big_packets) > > > - return; > > > + size = virtqueue_get_vring_size(sq->vq); > > > + > > > + size += MAX_SKB_FRAGS + 2; > > > > Btw, the dmainfo seems per sg? If I'm correct, how can vq_size + > > MAX_SKB_FRAGS + 2 work? > > > We may alloc dmainfo items when the vq is full. So I prepare more dmainfo items. > > > > > > > + > > > + sq->dmainfo.head = kcalloc(size, sizeof(*sq->dmainfo.head), GFP_KERNEL); > > > + if (!sq->dmainfo.head) > > > + return -ENOMEM; > > > + > > > + err = virtqueue_set_dma_premapped(sq->vq); > > > + if (err) { > > > + kfree(sq->dmainfo.head); > > > + return err; > > > + } > > > > Allocating after set_dma_premapped() seems easier. > > Yes. But, we donot has the way to disable premapped mode. > > That is my mistake. :) > > > > > > Btw, is there a benchmark of TX PPS just for this patch to demonstrate > > the impact of the performance? > > We will have that. > > > > > > > + > > > + sq->dmainfo.free = NULL; > > > + > > > + sq->do_dma = true; > > > + > > > + for (i = 0; i < size; ++i) { > > > + d = &sq->dmainfo.head[i]; > > > + > > > + d->next = sq->dmainfo.free; > > > + sq->dmainfo.free = d; > > > + } > > > + > > > + return 0; > > > +} > > > + > > > +static void virtnet_set_premapped(struct virtnet_info *vi) > > > +{ > > > + int i; > > > > > > for (i = 0; i < vi->max_queue_pairs; i++) { > > > - if (virtqueue_set_dma_premapped(vi->rq[i].vq)) > > > - continue; > > > + virtnet_sq_set_premapped(&vi->sq[i]); > > > > > > - vi->rq[i].do_dma = true; > > > + /* disable for big mode */ > > > + if (vi->mergeable_rx_bufs || !vi->big_packets) { > > > + if (!virtqueue_set_dma_premapped(vi->rq[i].vq)) > > > + vi->rq[i].do_dma = true; > > > > How about sticking a virtnet_rq_set_premapped() and calling it here? > > > > It seems more clean. > > OK. > > > > > > Btw, the big mode support for pre mapping is still worthwhile > > regardless whether or not XDP is supported. It has a page pool so we > > can avoid redundant DMA map/unmap there. > > Yes. > > I post other patch set to do this. > > > > > > > + } > > > } > > > } > > > > > > +static struct virtnet_sq_dma *virtnet_sq_map_sg(struct virtnet_sq *sq, int nents, void *data) > > > +{ > > > + struct virtnet_sq_dma *d, *head; > > > + struct scatterlist *sg; > > > + int i; > > > + > > > + head = NULL; > > > + > > > + for_each_sg(sq->sg, sg, nents, i) { > > > + sg->dma_address = virtqueue_dma_map_single_attrs(sq->vq, sg_virt(sg), > > > + sg->length, > > > + DMA_TO_DEVICE, 0); > > > + if (virtqueue_dma_mapping_error(sq->vq, sg->dma_address)) > > > + goto err; > > > + > > > + d = sq->dmainfo.free; > > > + sq->dmainfo.free = d->next; > > > + > > > + d->addr = sg->dma_address; > > > + d->len = sg->length; > > > + > > > + d->next = head; > > > + head = d; > > > + } > > > + > > > + head->data = data; > > > + > > > + return (void *)((unsigned long)head | ((unsigned long)data & VIRTIO_XMIT_DATA_MASK)); > > > > So head contains a pointer to data, any reason we still need to pack a > > data pointer here? > > Maybe you are right. We can skip this. > > > > > > > > > +err: > > > + virtnet_sq_unmap(sq, head); > > > + return NULL; > > > +} > > > + > > > +static int virtnet_add_outbuf(struct virtnet_sq *sq, u32 num, void *data) > > > +{ > > > + int ret; > > > + > > > + if (sq->do_dma) { > > > + data = virtnet_sq_map_sg(sq, num, data); > > > + if (!data) > > > + return -ENOMEM; > > > + } > > > + > > > + ret = virtqueue_add_outbuf(sq->vq, sq->sg, num, data, GFP_ATOMIC); > > > + if (ret && sq->do_dma) > > > + virtnet_sq_unmap(sq, data); > > > + > > > + return ret; > > > +} > > > + > > > static void free_old_xmit(struct virtnet_sq *sq, bool in_napi) > > > { > > > u64 bytes, packets = 0; > > > @@ -686,8 +802,7 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, > > > skb_frag_size(frag), skb_frag_off(frag)); > > > } > > > > > > - err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, > > > - xdp_to_ptr(xdpf), GFP_ATOMIC); > > > + err = virtnet_add_outbuf(sq, nr_frags + 1, xdp_to_ptr(xdpf)); > > > if (unlikely(err)) > > > return -ENOSPC; /* Caller handle free/refcnt */ > > > > > > @@ -2126,7 +2241,8 @@ static int xmit_skb(struct virtnet_sq *sq, struct sk_buff *skb) > > > return num_sg; > > > num_sg++; > > > } > > > - return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC); > > > + > > > + return virtnet_add_outbuf(sq, num_sg, skb); > > > } > > > > > > static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > > > @@ -3818,6 +3934,8 @@ static void virtnet_free_queues(struct virtnet_info *vi) > > > for (i = 0; i < vi->max_queue_pairs; i++) { > > > __netif_napi_del(&vi->rq[i].napi); > > > __netif_napi_del(&vi->sq[i].napi); > > > + > > > + kfree(vi->sq[i].dmainfo.head); > > > } > > > > > > /* We called __netif_napi_del(), > > > @@ -3866,10 +3984,23 @@ static void free_receive_page_frags(struct virtnet_info *vi) > > > > > > static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf) > > > { > > > - if (!is_xdp_frame(buf)) > > > + struct virtnet_info *vi = vq->vdev->priv; > > > + struct virtnet_sq *sq; > > > + int i = vq2rxq(vq); > > > + > > > + sq = &vi->sq[i]; > > > + > > > + if (!is_xdp_frame(buf)) { > > > + if (sq->do_dma) > > > + buf = virtnet_sq_unmap(sq, buf); > > > + > > > dev_kfree_skb(buf); > > > - else > > > + } else { > > > + if (sq->do_dma) > > > + buf = virtnet_sq_unmap(sq, buf); > > > + > > > xdp_return_frame(ptr_to_xdp(buf)); > > > + } > > > } > > > > > > static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf) > > > @@ -4075,7 +4206,7 @@ static int init_vqs(struct virtnet_info *vi) > > > if (ret) > > > goto err_free; > > > > > > - virtnet_rq_set_premapped(vi); > > > + virtnet_set_premapped(vi); > > > > > > cpus_read_lock(); > > > virtnet_set_affinity(vi); > > > diff --git a/drivers/net/virtio/virtio_net.h b/drivers/net/virtio/virtio_net.h > > > index d814341d9f97..ce806afb6d64 100644 > > > --- a/drivers/net/virtio/virtio_net.h > > > +++ b/drivers/net/virtio/virtio_net.h > > > @@ -48,6 +48,18 @@ struct virtnet_rq_dma { > > > u16 need_sync; > > > }; > > > > > > +struct virtnet_sq_dma { > > > + struct virtnet_sq_dma *next; > > > + dma_addr_t addr; > > > + u32 len; > > > + void *data; > > > > I think we need to seek a way to reuse what has been stored by virtio > > core. It should be much more efficient. > > > Yes. > > But that is for net-next branch. > > Can we do that as a fix after that is merged to 6.8? We still have time. I would like to do it from the start. Thanks > > Thanks. > > > > > > Thanks > > > > > +}; > > > + > > > +struct virtnet_sq_dma_head { > > > + struct virtnet_sq_dma *free; > > > + struct virtnet_sq_dma *head; > > > +}; > > > + > > > /* Internal representation of a send virtqueue */ > > > struct virtnet_sq { > > > /* Virtqueue associated with this virtnet_sq */ > > > @@ -67,6 +79,10 @@ struct virtnet_sq { > > > > > > /* Record whether sq is in reset state. */ > > > bool reset; > > > + > > > + bool do_dma; > > > + > > > + struct virtnet_sq_dma_head dmainfo; > > > }; > > > > > > /* Internal representation of a receive virtqueue */ > > > -- > > > 2.32.0.3.g01195cf9f > > > > > > > >