On Wed, Mar 27, 2024 at 7:14 PM Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> wrote: > > In the functions vring_unmap_extra_packed and vring_unmap_desc_packed, > multiple checks are made whether unmap is performed and whether it is > INDIRECT. > > These two functions are usually called in a loop, and we should put the > check outside the loop. > > And we unmap the descs with VRING_DESC_F_INDIRECT on the same path with > other descs, that make the thing more complex. If we distinguish the > descs with VRING_DESC_F_INDIRECT before unmap, thing will be clearer. > > For desc with VRING_DESC_F_INDIRECT flag: > 1. only one desc of the desc table is used, we do not need the loop > Theoretically, indirect descriptors could be chained. > But now, that is not supported by "add", so we ignore this case. > 2. the called unmap api is difference from the other desc > 3. the vq->premapped is not needed to check > 4. the vq->indirect is not needed to check > 5. the state->indir_desc must not be null It doesn't explain the connection to the goal of this series. If it's not a must I'd suggest moving it to a separate patch. > > Signed-off-by: Xuan Zhuo <xuanzhuo@xxxxxxxxxxxxxxxxx> Rethink this, it looks to me it would complicate the codes furtherly. For example, vring_map_xxx() helpers will check premappred and use_dma_api by itself. But in the case of vring_unmap() you want to move those checks to the caller. This will result in tricky codes that are hard to understand. We need to be consistent here. If we try to optimize unmap we need to optimize map as well. But generally it would complicate the logic of the caller if we want to let the caller to differ. Ideally, the caller of those function should know nothing about use_dma_api, premapped and other. > --- > drivers/virtio/virtio_ring.c | 78 ++++++++++++++++++------------------ > 1 file changed, 40 insertions(+), 38 deletions(-) > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c > index 03360073bd4a..a2838fe1cc08 100644 > --- a/drivers/virtio/virtio_ring.c > +++ b/drivers/virtio/virtio_ring.c > @@ -1214,6 +1214,7 @@ static u16 packed_last_used(u16 last_used_idx) > return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); > } > > +/* caller must check vring_need_unmap_buffer() */ > static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, > const struct vring_desc_extra *extra) > { > @@ -1221,33 +1222,18 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, > > flags = extra->flags; > > - if (flags & VRING_DESC_F_INDIRECT) { > - if (!vq->use_dma_api) > - return; > - > - dma_unmap_single(vring_dma_dev(vq), > - extra->addr, extra->len, > - (flags & VRING_DESC_F_WRITE) ? > - DMA_FROM_DEVICE : DMA_TO_DEVICE); > - } else { > - if (!vring_need_unmap_buffer(vq)) > - return; > - > - dma_unmap_page(vring_dma_dev(vq), > - extra->addr, extra->len, > - (flags & VRING_DESC_F_WRITE) ? > - DMA_FROM_DEVICE : DMA_TO_DEVICE); > - } > + dma_unmap_page(vring_dma_dev(vq), > + extra->addr, extra->len, > + (flags & VRING_DESC_F_WRITE) ? > + DMA_FROM_DEVICE : DMA_TO_DEVICE); > } > > +/* caller must check vring_need_unmap_buffer() */ > static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, > const struct vring_packed_desc *desc) > { > u16 flags; > > - if (!vring_need_unmap_buffer(vq)) > - return; > - > flags = le16_to_cpu(desc->flags); > > dma_unmap_page(vring_dma_dev(vq), > @@ -1323,7 +1309,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, > total_sg * sizeof(struct vring_packed_desc), > DMA_TO_DEVICE); > if (vring_mapping_error(vq, addr)) { > - if (vq->premapped) > + if (!vring_need_unmap_buffer(vq)) > goto free_desc; I would do this to make it much more easier to be read and avoid the warn: if (vring_mapping_error(vq, addr)) goto unmap_release; unmap_release: if (vring_need_unmap_buffer(vq)) for (i = 0, xxx) free_desc: kfree(desc); or it could be unmap_release: if (!vring_need_unmap_buffer(vq)) goto free_desc; Still tricky but better. > > goto unmap_release; > @@ -1338,10 +1324,11 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, > vq->packed.desc_extra[id].addr = addr; > vq->packed.desc_extra[id].len = total_sg * > sizeof(struct vring_packed_desc); > - vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | > - vq->packed.avail_used_flags; > } > > + vq->packed.desc_extra[id].flags = VRING_DESC_F_INDIRECT | > + vq->packed.avail_used_flags; An example of the tricky code, I think you do this because you want to differ indirect in detach_buf_packed(): flags = vq->packed.desc_extra[id].flags; > + > /* > * A driver MUST NOT make the first descriptor in the list > * available before all subsequent descriptors comprising > @@ -1382,6 +1369,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, > unmap_release: > err_idx = i; > > + WARN_ON(!vring_need_unmap_buffer(vq)); > + > for (i = 0; i < err_idx; i++) > vring_unmap_desc_packed(vq, &desc[i]); > > @@ -1475,12 +1464,13 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, > desc[i].len = cpu_to_le32(sg->length); > desc[i].id = cpu_to_le16(id); > > - if (unlikely(vq->use_dma_api)) { > + if (vring_need_unmap_buffer(vq)) { > vq->packed.desc_extra[curr].addr = addr; > vq->packed.desc_extra[curr].len = sg->length; > - vq->packed.desc_extra[curr].flags = > - le16_to_cpu(flags); > } > + > + vq->packed.desc_extra[curr].flags = le16_to_cpu(flags); > + > prev = curr; > curr = vq->packed.desc_extra[curr].next; > > @@ -1530,6 +1520,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, > > vq->packed.avail_used_flags = avail_used_flags; > > + WARN_ON(!vring_need_unmap_buffer(vq)); > + > for (n = 0; n < total_sg; n++) { > if (i == err_idx) > break; > @@ -1599,7 +1591,9 @@ static void detach_buf_packed(struct vring_virtqueue *vq, > struct vring_desc_state_packed *state = NULL; > struct vring_packed_desc *desc; > unsigned int i, curr; > + u16 flags; > > + flags = vq->packed.desc_extra[id].flags; Can we check vq->indirect && indir_desc here? Then we don't need special care to store flags in desc_extra. > state = &vq->packed.desc_state[id]; > > /* Clear data ptr. */ > @@ -1609,22 +1603,32 @@ static void detach_buf_packed(struct vring_virtqueue *vq, > vq->free_head = id; > vq->vq.num_free += state->num; > > - if (unlikely(vq->use_dma_api)) { > - curr = id; > - for (i = 0; i < state->num; i++) { > - vring_unmap_extra_packed(vq, > - &vq->packed.desc_extra[curr]); > - curr = vq->packed.desc_extra[curr].next; > + if (!(flags & VRING_DESC_F_INDIRECT)) { > + if (vring_need_unmap_buffer(vq)) { > + curr = id; > + for (i = 0; i < state->num; i++) { > + vring_unmap_extra_packed(vq, > + &vq->packed.desc_extra[curr]); > + curr = vq->packed.desc_extra[curr].next; > + } > } > - } > > - if (vq->indirect) { > + if (ctx) > + *ctx = state->indir_desc; > + } else { > + const struct vring_desc_extra *extra; > u32 len; > > + if (vq->use_dma_api) { > + extra = &vq->packed.desc_extra[id]; > + dma_unmap_single(vring_dma_dev(vq), > + extra->addr, extra->len, > + (flags & VRING_DESC_F_WRITE) ? > + DMA_FROM_DEVICE : DMA_TO_DEVICE); > + } > + > /* Free the indirect table, if any, now that it's unmapped. */ > desc = state->indir_desc; > - if (!desc) > - return; > > if (vring_need_unmap_buffer(vq)) { > len = vq->packed.desc_extra[id].len; > @@ -1634,8 +1638,6 @@ static void detach_buf_packed(struct vring_virtqueue *vq, > } > kfree(desc); > state->indir_desc = NULL; > - } else if (ctx) { > - *ctx = state->indir_desc; > } > } > > -- > 2.32.0.3.g01195cf9f > Thanks