On Tue, 7 Mar 2023 15:39:45 +0800, Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> wrote: > On Tue, 7 Mar 2023 14:43:42 +0800, Jason Wang <jasowang@xxxxxxxxxx> wrote: > > On Thu, Mar 2, 2023 at 7:59 PM Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> wrote: > > > > > > virtqueue_add_split() only supports virtual addresses, dma is completed > > > in virtqueue_add_split(). > > > > > > In some scenarios (such as the AF_XDP scenario), the memory is allocated > > > and DMA is completed in advance, so it is necessary for us to support > > > passing the DMA address to virtio core. > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> > > > --- > > > drivers/virtio/virtio_ring.c | 74 +++++++++++++++++++++++++++++++----- > > > include/linux/virtio.h | 5 +++ > > > 2 files changed, 69 insertions(+), 10 deletions(-) > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > > > index 17520f0d7649..5b186ce73d35 100644 > > > --- a/drivers/virtio/virtio_ring.c > > > +++ b/drivers/virtio/virtio_ring.c > > > @@ -70,6 +70,7 @@ > > > struct vring_desc_state_split { > > > void *data; /* Data for callback. */ > > > struct vring_desc *indir_desc; /* Indirect descriptor, if any. */ > > > + bool dma_map; /* Addr is mapped by virtio core or not. */ > > > > This will stress the cache, can we pack the boolean into indir_desc? > > > Rethink about this, I think we cannot pack this into indir_desc. > Because we may save ctx to indir_desc. > > We can save this info to vring_desc_extra.addr, null means not dma mapped by > virtio core. I think I may have made a mistake, if we check sgs->dma_address to determine whether to do dma operation, then we may not have to refactor so much for virtqueue_add_split/virtqueue_add_packed. The code should be simpler. Thanks. > > Thanks. > > > > > > > }; > > > > > > struct vring_desc_state_packed { > > > @@ -440,7 +441,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, > > > } > > > > > > static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, > > > - unsigned int i) > > > + unsigned int i, bool dma_map) > > > { > > > struct vring_desc_extra *extra = vq->split.desc_extra; > > > u16 flags; > > > @@ -457,6 +458,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, > > > (flags & VRING_DESC_F_WRITE) ? > > > DMA_FROM_DEVICE : DMA_TO_DEVICE); > > > } else { > > > + if (!dma_map) > > > + goto out; > > > + > > > dma_unmap_page(vring_dma_dev(vq), > > > extra[i].addr, > > > extra[i].len, > > > @@ -751,6 +755,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > unsigned int in_sgs, > > > void *data, > > > void *ctx, > > > + bool dma_map, > > > gfp_t gfp) > > > { > > > struct vring_virtqueue *vq = to_vvq(_vq); > > > @@ -767,9 +772,11 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > goto end; > > > } > > > > > > - err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs); > > > - if (err) > > > - goto err; > > > + if (dma_map) { > > > > Could we simply check sg->dma_addr in this case? Then we don't need to > > introduce the dma_map flag. > > > > Thanks > > > > > > > + err = virtqueue_map_sgs(vq, sgs, total_sg, out_sgs, in_sgs); > > > + if (err) > > > + goto err; > > > + } > > > > > > head = vq->free_head; > > > err = virtqueue_add_vring_split(vq, sgs, total_sg, out_sgs, in_sgs, desc); > > > @@ -779,11 +786,13 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, > > > /* Store token and indirect buffer state. */ > > > vq->split.desc_state[head].data = data; > > > vq->split.desc_state[head].indir_desc = desc ? desc : ctx; > > > + vq->split.desc_state[head].dma_map = dma_map; > > > > > > goto end; > > > > > > err: > > > - virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); > > > + if (dma_map) > > > + virtqueue_unmap_sgs(vq, sgs, total_sg, out_sgs, in_sgs); > > > > > > kfree(desc); > > > > > > @@ -828,20 +837,23 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, > > > { > > > unsigned int i, j; > > > __virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT); > > > + bool dma_map; > > > > > > /* Clear data ptr. */ > > > vq->split.desc_state[head].data = NULL; > > > > > > + dma_map = vq->split.desc_state[head].dma_map; > > > + > > > /* Put back on free list: unmap first-level descriptors and find end */ > > > i = head; > > > > > > while (vq->split.vring.desc[i].flags & nextflag) { > > > - vring_unmap_one_split(vq, i); > > > + vring_unmap_one_split(vq, i, dma_map); > > > i = vq->split.desc_extra[i].next; > > > vq->vq.num_free++; > > > } > > > > > > - vring_unmap_one_split(vq, i); > > > + vring_unmap_one_split(vq, i, dma_map); > > > vq->split.desc_extra[i].next = vq->free_head; > > > vq->free_head = head; > > > > > > @@ -863,8 +875,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, > > > VRING_DESC_F_INDIRECT)); > > > BUG_ON(len == 0 || len % sizeof(struct vring_desc)); > > > > > > - for (j = 0; j < len / sizeof(struct vring_desc); j++) > > > - vring_unmap_one_split_indirect(vq, &indir_desc[j]); > > > + if (dma_map) { > > > + for (j = 0; j < len / sizeof(struct vring_desc); j++) > > > + vring_unmap_one_split_indirect(vq, &indir_desc[j]); > > > + } > > > > > > kfree(indir_desc); > > > vq->split.desc_state[head].indir_desc = NULL; > > > @@ -2204,7 +2218,22 @@ static inline int virtqueue_add(struct virtqueue *_vq, > > > return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg, > > > out_sgs, in_sgs, data, ctx, gfp) : > > > virtqueue_add_split(_vq, sgs, total_sg, > > > - out_sgs, in_sgs, data, ctx, gfp); > > > + out_sgs, in_sgs, data, ctx, true, gfp); > > > +} > > > + > > > +static inline int virtqueue_add_premapped(struct virtqueue *_vq, > > > + struct scatterlist *sgs[], > > > + unsigned int total_sg, > > > + unsigned int out_sgs, > > > + unsigned int in_sgs, > > > + void *data, > > > + void *ctx, > > > + gfp_t gfp) > > > +{ > > > + struct vring_virtqueue *vq = to_vvq(_vq); > > > + > > > + return virtqueue_add_split(_vq, sgs, total_sg, out_sgs, in_sgs, data, > > > + ctx, false, gfp); > > > } > > > > > > /** > > > @@ -2264,6 +2293,31 @@ int virtqueue_add_outbuf(struct virtqueue *vq, > > > } > > > EXPORT_SYMBOL_GPL(virtqueue_add_outbuf); > > > > > > +/** > > > + * virtqueue_add_outbuf_premapped - expose output buffers with dma address to other end > > > + * @vq: the struct virtqueue we're talking about. > > > + * @sg: scatterlist (must be well-formed and terminated!) > > > + * @num: the number of entries in @sg readable by other side > > > + * @data: the token identifying the buffer. > > > + * @gfp: how to do memory allocations (if necessary). > > > + * > > > + * Caller must ensure we don't call this with other virtqueue operations > > > + * at the same time (except where noted). > > > + * > > > + * It is required that all addrs have completed DMA operations. And use > > > + * sg->dma_address, sg->length to pass addr and length. > > > + * > > > + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO). > > > + */ > > > +int virtqueue_add_outbuf_premapped(struct virtqueue *vq, > > > + struct scatterlist *sg, unsigned int num, > > > + void *data, > > > + gfp_t gfp) > > > +{ > > > + return virtqueue_add_premapped(vq, &sg, num, 1, 0, data, NULL, gfp); > > > +} > > > +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_premapped); > > > + > > > /** > > > * virtqueue_add_inbuf - expose input buffers to other end > > > * @vq: the struct virtqueue we're talking about. > > > diff --git a/include/linux/virtio.h b/include/linux/virtio.h > > > index dcab9c7e8784..d8b472a7dcae 100644 > > > --- a/include/linux/virtio.h > > > +++ b/include/linux/virtio.h > > > @@ -43,6 +43,11 @@ int virtqueue_add_outbuf(struct virtqueue *vq, > > > void *data, > > > gfp_t gfp); > > > > > > +int virtqueue_add_outbuf_premapped(struct virtqueue *vq, > > > + struct scatterlist *sg, unsigned int num, > > > + void *data, > > > + gfp_t gfp); > > > + > > > int virtqueue_add_inbuf(struct virtqueue *vq, > > > struct scatterlist sg[], unsigned int num, > > > void *data, > > > -- > > > 2.32.0.3.g01195cf9f > > > > > > _______________________________________________ > Virtualization mailing list > Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx > https://lists.linuxfoundation.org/mailman/listinfo/virtualization _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization