在 2022/2/10 下午4:51, Xuan Zhuo 写道:
virtqueue_add_split() only supports virtual addresses, dma is completed
in virtqueue_add_split().
In some scenarios (such as the AF_XDP scenario), the memory is allocated
and DMA is completed in advance, so it is necessary for us to support
passing the DMA address to virtqueue_add_split().
And record this predma information in extra->flags, which can be skipped
when executing dma unmap.
Signed-off-by: Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx>
---
drivers/virtio/virtio_ring.c | 62 ++++++++++++++++++++++++++++--------
1 file changed, 48 insertions(+), 14 deletions(-)
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index cf9d118668f1..d32c0bf6016f 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -66,6 +66,9 @@
#define LAST_ADD_TIME_INVALID(vq)
#endif
+/* This means the buffer dma is pre-alloc. Just used by vring_desc_extra */
+#define VIRTIO_DESC_F_PREDMA (1 << 15)
I suggest to use a new field in desc_extra to avoid conflict with future
virtio extension.
+
struct vring_desc_state_split {
void *data; /* Data for callback. */
struct vring_desc *indir_desc; /* Indirect descriptor, if any. */
@@ -387,7 +390,7 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq,
}
static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
- unsigned int i)
+ unsigned int i, bool predma)
{
struct vring_desc_extra *extra = vq->split.desc_extra;
u16 flags;
@@ -404,6 +407,9 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
(flags & VRING_DESC_F_WRITE) ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
} else {
+ if (predma)
+ goto out;
+
dma_unmap_page(vring_dma_dev(vq),
extra[i].addr,
extra[i].len,
@@ -474,7 +480,8 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
unsigned int in_sgs,
void *data,
void *ctx,
- gfp_t gfp)
+ gfp_t gfp,
+ bool predma)
{
struct vring_virtqueue *vq = to_vvq(_vq);
struct scatterlist *sg;
@@ -535,9 +542,16 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
for (n = 0; n < out_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
+ dma_addr_t addr;
+
+ if (predma) {
+ addr = sg_dma_address(sg);
+
+ } else {
+ addr = vring_map_one_sg(vq, sg, DMA_TO_DEVICE);
+ if (vring_mapping_error(vq, addr))
+ goto unmap_release;
+ }
prev = i;
/* Note that we trust indirect descriptor
@@ -550,9 +564,16 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
}
for (; n < (out_sgs + in_sgs); n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
- dma_addr_t addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
- if (vring_mapping_error(vq, addr))
- goto unmap_release;
+ dma_addr_t addr;
+
+ if (predma) {
+ addr = sg_dma_address(sg);
+
+ } else {
+ addr = vring_map_one_sg(vq, sg, DMA_FROM_DEVICE);
+ if (vring_mapping_error(vq, addr))
+ goto unmap_release;
+ }
prev = i;
/* Note that we trust indirect descriptor
@@ -602,6 +623,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
else
vq->split.desc_state[head].indir_desc = ctx;
+ if (predma)
+ vq->split.desc_extra[head].flags |= VIRTIO_DESC_F_PREDMA;
+
/* Put entry in available array (but don't update avail->idx until they
* do sync). */
avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@@ -626,6 +650,9 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
return 0;
unmap_release:
+ if (predma)
+ goto skip_unmap;
+
Nit: we probably need a better name for the label how about "unmap_free"?
Thanks
err_idx = i;
if (indirect)
@@ -640,9 +667,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
vring_unmap_one_split_indirect(vq, &desc[i]);
i = virtio16_to_cpu(_vq->vdev, desc[i].next);
} else
- i = vring_unmap_one_split(vq, i);
+ i = vring_unmap_one_split(vq, i, false);
}
+skip_unmap:
if (indirect)
kfree(desc);
@@ -686,20 +714,24 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
{
unsigned int i, j;
__virtio16 nextflag = cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT);
+ bool predma = false;
/* Clear data ptr. */
vq->split.desc_state[head].data = NULL;
+ if (vq->split.desc_extra[head].flags & VIRTIO_DESC_F_PREDMA)
+ predma = true;
+
/* Put back on free list: unmap first-level descriptors and find end */
i = head;
while (vq->split.vring.desc[i].flags & nextflag) {
- vring_unmap_one_split(vq, i);
+ vring_unmap_one_split(vq, i, predma);
i = vq->split.desc_extra[i].next;
vq->vq.num_free++;
}
- vring_unmap_one_split(vq, i);
+ vring_unmap_one_split(vq, i, predma);
vq->split.desc_extra[i].next = vq->free_head;
vq->free_head = head;
@@ -721,8 +753,10 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
VRING_DESC_F_INDIRECT));
BUG_ON(len == 0 || len % sizeof(struct vring_desc));
- for (j = 0; j < len / sizeof(struct vring_desc); j++)
- vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+ if (!predma) {
+ for (j = 0; j < len / sizeof(struct vring_desc); j++)
+ vring_unmap_one_split_indirect(vq, &indir_desc[j]);
+ }
kfree(indir_desc);
vq->split.desc_state[head].indir_desc = NULL;
@@ -1788,7 +1822,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
return vq->packed_ring ? virtqueue_add_packed(_vq, sgs, total_sg,
out_sgs, in_sgs, data, ctx, gfp) :
virtqueue_add_split(_vq, sgs, total_sg,
- out_sgs, in_sgs, data, ctx, gfp);
+ out_sgs, in_sgs, data, ctx, gfp, predma);
}
/**
_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization