On Wed, Oct 20, 2021 at 2:52 PM Eugenio Perez Martin <eperezma@xxxxxxxxxx> wrote: > > On Wed, Oct 20, 2021 at 4:07 AM Jason Wang <jasowang@xxxxxxxxxx> wrote: > > > > On Wed, Oct 20, 2021 at 10:02 AM Jason Wang <jasowang@xxxxxxxxxx> wrote: > > > > > > On Tue, Oct 19, 2021 at 6:29 PM Eugenio Perez Martin > > > <eperezma@xxxxxxxxxx> wrote: > > > > > > > > On Tue, Oct 19, 2021 at 11:25 AM Jason Wang <jasowang@xxxxxxxxxx> wrote: > > > > > > > > > > > > > > > 在 2021/10/1 下午3:06, Eugenio Pérez 写道: > > > > > > Use translations added in VhostIOVATree in SVQ. > > > > > > > > > > > > Now every element needs to store the previous address also, so VirtQueue > > > > > > can consume the elements properly. This adds a little overhead per VQ > > > > > > element, having to allocate more memory to stash them. As a possible > > > > > > optimization, this allocation could be avoided if the descriptor is not > > > > > > a chain but a single one, but this is left undone. > > > > > > > > > > > > TODO: iova range should be queried before, and add logic to fail when > > > > > > GPA is outside of its range and memory listener or svq add it. > > > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@xxxxxxxxxx> > > > > > > --- > > > > > > hw/virtio/vhost-shadow-virtqueue.h | 4 +- > > > > > > hw/virtio/vhost-shadow-virtqueue.c | 130 ++++++++++++++++++++++++----- > > > > > > hw/virtio/vhost-vdpa.c | 40 ++++++++- > > > > > > hw/virtio/trace-events | 1 + > > > > > > 4 files changed, 152 insertions(+), 23 deletions(-) > > > > > > > > > > > > diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h > > > > > > index b7baa424a7..a0e6b5267a 100644 > > > > > > --- a/hw/virtio/vhost-shadow-virtqueue.h > > > > > > +++ b/hw/virtio/vhost-shadow-virtqueue.h > > > > > > @@ -11,6 +11,7 @@ > > > > > > #define VHOST_SHADOW_VIRTQUEUE_H > > > > > > > > > > > > #include "hw/virtio/vhost.h" > > > > > > +#include "hw/virtio/vhost-iova-tree.h" > > > > > > > > > > > > typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; > > > > > > > > > > > > @@ -28,7 +29,8 @@ bool vhost_svq_start(struct vhost_dev *dev, unsigned idx, > > > > > > void vhost_svq_stop(struct vhost_dev *dev, unsigned idx, > > > > > > VhostShadowVirtqueue *svq); > > > > > > > > > > > > -VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx); > > > > > > +VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx, > > > > > > + VhostIOVATree *iova_map); > > > > > > > > > > > > void vhost_svq_free(VhostShadowVirtqueue *vq); > > > > > > > > > > > > diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c > > > > > > index 2fd0bab75d..9db538547e 100644 > > > > > > --- a/hw/virtio/vhost-shadow-virtqueue.c > > > > > > +++ b/hw/virtio/vhost-shadow-virtqueue.c > > > > > > @@ -11,12 +11,19 @@ > > > > > > #include "hw/virtio/vhost-shadow-virtqueue.h" > > > > > > #include "hw/virtio/vhost.h" > > > > > > #include "hw/virtio/virtio-access.h" > > > > > > +#include "hw/virtio/vhost-iova-tree.h" > > > > > > > > > > > > #include "standard-headers/linux/vhost_types.h" > > > > > > > > > > > > #include "qemu/error-report.h" > > > > > > #include "qemu/main-loop.h" > > > > > > > > > > > > +typedef struct SVQElement { > > > > > > + VirtQueueElement elem; > > > > > > + void **in_sg_stash; > > > > > > + void **out_sg_stash; > > > > > > +} SVQElement; > > > > > > + > > > > > > /* Shadow virtqueue to relay notifications */ > > > > > > typedef struct VhostShadowVirtqueue { > > > > > > /* Shadow vring */ > > > > > > @@ -46,8 +53,11 @@ typedef struct VhostShadowVirtqueue { > > > > > > /* Virtio device */ > > > > > > VirtIODevice *vdev; > > > > > > > > > > > > + /* IOVA mapping if used */ > > > > > > + VhostIOVATree *iova_map; > > > > > > + > > > > > > /* Map for returning guest's descriptors */ > > > > > > - VirtQueueElement **ring_id_maps; > > > > > > + SVQElement **ring_id_maps; > > > > > > > > > > > > /* Next head to expose to device */ > > > > > > uint16_t avail_idx_shadow; > > > > > > @@ -79,13 +89,6 @@ bool vhost_svq_valid_device_features(uint64_t *dev_features) > > > > > > continue; > > > > > > > > > > > > case VIRTIO_F_ACCESS_PLATFORM: > > > > > > - /* SVQ needs this feature disabled. Can't continue */ > > > > > > - if (*dev_features & BIT_ULL(b)) { > > > > > > - clear_bit(b, dev_features); > > > > > > - r = false; > > > > > > - } > > > > > > - break; > > > > > > - > > > > > > case VIRTIO_F_VERSION_1: > > > > > > /* SVQ needs this feature, so can't continue */ > > > > > > if (!(*dev_features & BIT_ULL(b))) { > > > > > > @@ -126,6 +129,64 @@ static void vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable) > > > > > > } > > > > > > } > > > > > > > > > > > > +static void vhost_svq_stash_addr(void ***stash, const struct iovec *iov, > > > > > > + size_t num) > > > > > > +{ > > > > > > + size_t i; > > > > > > + > > > > > > + if (num == 0) { > > > > > > + return; > > > > > > + } > > > > > > + > > > > > > + *stash = g_new(void *, num); > > > > > > + for (i = 0; i < num; ++i) { > > > > > > + (*stash)[i] = iov[i].iov_base; > > > > > > + } > > > > > > +} > > > > > > + > > > > > > +static void vhost_svq_unstash_addr(void **stash, struct iovec *iov, size_t num) > > > > > > +{ > > > > > > + size_t i; > > > > > > + > > > > > > + if (num == 0) { > > > > > > + return; > > > > > > + } > > > > > > + > > > > > > + for (i = 0; i < num; ++i) { > > > > > > + iov[i].iov_base = stash[i]; > > > > > > + } > > > > > > + g_free(stash); > > > > > > +} > > > > > > + > > > > > > +static void vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, > > > > > > + struct iovec *iovec, size_t num) > > > > > > +{ > > > > > > + size_t i; > > > > > > + > > > > > > + for (i = 0; i < num; ++i) { > > > > > > + VhostDMAMap needle = { > > > > > > + .translated_addr = iovec[i].iov_base, > > > > > > + .size = iovec[i].iov_len, > > > > > > + }; > > > > > > + size_t off; > > > > > > + > > > > > > + const VhostDMAMap *map = vhost_iova_tree_find_iova(svq->iova_map, > > > > > > + &needle); > > > > > > > > > > > > > > > Is it possible that we end up with more than one maps here? > > > > > > > > > > > > > Actually it is possible, since there is no guarantee that one > > > > descriptor (or indirect descriptor) maps exactly to one iov. It could > > > > map to many if qemu vaddr is not contiguous but GPA + size is. This is > > > > something that must be fixed for the next revision, so thanks for > > > > pointing it out! > > > > > > > > Taking that into account, the condition that svq vring avail_idx - > > > > used_idx was always less or equal than guest's vring avail_idx - > > > > used_idx is not true anymore. Checking for that before adding buffers > > > > to SVQ is the easy part, but how could we recover in that case? > > > > > > > > I think that the easy solution is to check for more available buffers > > > > unconditionally at the end of vhost_svq_handle_call, which handles the > > > > SVQ used and is supposed to make more room for available buffers. So > > > > vhost_handle_guest_kick would not check if eventfd is set or not > > > > anymore. > > > > > > > > Would that make sense? > > > > > > Yes, I think it should work. > > > > Btw, I wonder how to handle indirect descriptors. SVQ doesn't use > > indirect descriptors for now, but it looks like a must otherwise we > > may end up SVQ is full before VQ. > > > > We can get to that situation without indirect too, if a single > descriptor maps to more than one sg buffer. The next revision is going > to control that too. > > > It looks to me an easy way is to always use indirect descriptors if #sg >= 2? > > > > I will use that, but that does not solve the case where a descriptor > maps to > 1 different buffers in qemu vaddr. Right, so we need to deal with the case when SVQ is out of space. > So I think that some > check after marking descriptors as used is a must somehow. I thought it should be before processing the available buffer? It's the guest driver that make sure there's sufficient space for used ring? Thanks > > > > Thanks > > > > > > > > Thanks > > > > > > > > > > > Thanks! > > > > > > > > > > > > > > > + /* > > > > > > + * Map cannot be NULL since iova map contains all guest space and > > > > > > + * qemu already has a physical address mapped > > > > > > + */ > > > > > > + assert(map); > > > > > > + > > > > > > + /* > > > > > > + * Map->iova chunk size is ignored. What to do if descriptor > > > > > > + * (addr, size) does not fit is delegated to the device. > > > > > > + */ > > > > > > + off = needle.translated_addr - map->translated_addr; > > > > > > + iovec[i].iov_base = (void *)(map->iova + off); > > > > > > + } > > > > > > +} > > > > > > + > > > > > > static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, > > > > > > const struct iovec *iovec, > > > > > > size_t num, bool more_descs, bool write) > > > > > > @@ -156,8 +217,9 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, > > > > > > } > > > > > > > > > > > > static unsigned vhost_svq_add_split(VhostShadowVirtqueue *svq, > > > > > > - VirtQueueElement *elem) > > > > > > + SVQElement *svq_elem) > > > > > > { > > > > > > + VirtQueueElement *elem = &svq_elem->elem; > > > > > > int head; > > > > > > unsigned avail_idx; > > > > > > vring_avail_t *avail = svq->vring.avail; > > > > > > @@ -167,6 +229,12 @@ static unsigned vhost_svq_add_split(VhostShadowVirtqueue *svq, > > > > > > /* We need some descriptors here */ > > > > > > assert(elem->out_num || elem->in_num); > > > > > > > > > > > > + vhost_svq_stash_addr(&svq_elem->in_sg_stash, elem->in_sg, elem->in_num); > > > > > > + vhost_svq_stash_addr(&svq_elem->out_sg_stash, elem->out_sg, elem->out_num); > > > > > > > > > > > > > > > I wonder if we can solve the trick like stash and unstash with a > > > > > dedicated sgs in svq_elem, instead of reusing the elem. > > > > > > > > > > > > > Actually yes, it would be way simpler to use a new sgs array in > > > > svq_elem. I will change that. > > > > > > > > Thanks! > > > > > > > > > Thanks > > > > > > > > > > > > > > > > + > > > > > > + vhost_svq_translate_addr(svq, elem->in_sg, elem->in_num); > > > > > > + vhost_svq_translate_addr(svq, elem->out_sg, elem->out_num); > > > > > > + > > > > > > vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, > > > > > > elem->in_num > 0, false); > > > > > > vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); > > > > > > @@ -187,7 +255,7 @@ static unsigned vhost_svq_add_split(VhostShadowVirtqueue *svq, > > > > > > > > > > > > } > > > > > > > > > > > > -static void vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) > > > > > > +static void vhost_svq_add(VhostShadowVirtqueue *svq, SVQElement *elem) > > > > > > { > > > > > > unsigned qemu_head = vhost_svq_add_split(svq, elem); > > > > > > > > > > > > @@ -221,7 +289,7 @@ static void vhost_handle_guest_kick(EventNotifier *n) > > > > > > } > > > > > > > > > > > > while (true) { > > > > > > - VirtQueueElement *elem = virtqueue_pop(svq->vq, sizeof(*elem)); > > > > > > + SVQElement *elem = virtqueue_pop(svq->vq, sizeof(*elem)); > > > > > > if (!elem) { > > > > > > break; > > > > > > } > > > > > > @@ -247,7 +315,7 @@ static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) > > > > > > return svq->used_idx != svq->shadow_used_idx; > > > > > > } > > > > > > > > > > > > -static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq) > > > > > > +static SVQElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq) > > > > > > { > > > > > > vring_desc_t *descs = svq->vring.desc; > > > > > > const vring_used_t *used = svq->vring.used; > > > > > > @@ -279,7 +347,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq) > > > > > > descs[used_elem.id].next = svq->free_head; > > > > > > svq->free_head = used_elem.id; > > > > > > > > > > > > - svq->ring_id_maps[used_elem.id]->len = used_elem.len; > > > > > > + svq->ring_id_maps[used_elem.id]->elem.len = used_elem.len; > > > > > > return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); > > > > > > } > > > > > > > > > > > > @@ -296,12 +364,19 @@ static void vhost_svq_handle_call_no_test(EventNotifier *n) > > > > > > > > > > > > vhost_svq_set_notification(svq, false); > > > > > > while (true) { > > > > > > - g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq); > > > > > > - if (!elem) { > > > > > > + g_autofree SVQElement *svq_elem = vhost_svq_get_buf(svq); > > > > > > + VirtQueueElement *elem; > > > > > > + if (!svq_elem) { > > > > > > break; > > > > > > } > > > > > > > > > > > > assert(i < svq->vring.num); > > > > > > + elem = &svq_elem->elem; > > > > > > + > > > > > > + vhost_svq_unstash_addr(svq_elem->in_sg_stash, elem->in_sg, > > > > > > + elem->in_num); > > > > > > + vhost_svq_unstash_addr(svq_elem->out_sg_stash, elem->out_sg, > > > > > > + elem->out_num); > > > > > > virtqueue_fill(vq, elem, elem->len, i++); > > > > > > } > > > > > > > > > > > > @@ -451,14 +526,24 @@ void vhost_svq_stop(struct vhost_dev *dev, unsigned idx, > > > > > > event_notifier_set_handler(&svq->host_notifier, NULL); > > > > > > > > > > > > for (i = 0; i < svq->vring.num; ++i) { > > > > > > - g_autofree VirtQueueElement *elem = svq->ring_id_maps[i]; > > > > > > + g_autofree SVQElement *svq_elem = svq->ring_id_maps[i]; > > > > > > + VirtQueueElement *elem; > > > > > > + > > > > > > + if (!svq_elem) { > > > > > > + continue; > > > > > > + } > > > > > > + > > > > > > + elem = &svq_elem->elem; > > > > > > + vhost_svq_unstash_addr(svq_elem->in_sg_stash, elem->in_sg, > > > > > > + elem->in_num); > > > > > > + vhost_svq_unstash_addr(svq_elem->out_sg_stash, elem->out_sg, > > > > > > + elem->out_num); > > > > > > + > > > > > > /* > > > > > > * Although the doc says we must unpop in order, it's ok to unpop > > > > > > * everything. > > > > > > */ > > > > > > - if (elem) { > > > > > > - virtqueue_unpop(svq->vq, elem, elem->len); > > > > > > - } > > > > > > + virtqueue_unpop(svq->vq, elem, elem->len); > > > > > > } > > > > > > } > > > > > > > > > > > > @@ -466,7 +551,8 @@ void vhost_svq_stop(struct vhost_dev *dev, unsigned idx, > > > > > > * Creates vhost shadow virtqueue, and instruct vhost device to use the shadow > > > > > > * methods and file descriptors. > > > > > > */ > > > > > > -VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx) > > > > > > +VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx, > > > > > > + VhostIOVATree *iova_map) > > > > > > { > > > > > > int vq_idx = dev->vq_index + idx; > > > > > > unsigned num = virtio_queue_get_num(dev->vdev, vq_idx); > > > > > > @@ -500,11 +586,13 @@ VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx) > > > > > > memset(svq->vring.desc, 0, driver_size); > > > > > > svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size); > > > > > > memset(svq->vring.used, 0, device_size); > > > > > > + svq->iova_map = iova_map; > > > > > > + > > > > > > for (i = 0; i < num - 1; i++) { > > > > > > svq->vring.desc[i].next = cpu_to_le16(i + 1); > > > > > > } > > > > > > > > > > > > - svq->ring_id_maps = g_new0(VirtQueueElement *, num); > > > > > > + svq->ring_id_maps = g_new0(SVQElement *, num); > > > > > > event_notifier_set_handler(&svq->call_notifier, > > > > > > vhost_svq_handle_call); > > > > > > return g_steal_pointer(&svq); > > > > > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c > > > > > > index a9c680b487..f5a12fee9d 100644 > > > > > > --- a/hw/virtio/vhost-vdpa.c > > > > > > +++ b/hw/virtio/vhost-vdpa.c > > > > > > @@ -176,6 +176,18 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, > > > > > > vaddr, section->readonly); > > > > > > > > > > > > llsize = int128_sub(llend, int128_make64(iova)); > > > > > > + if (v->shadow_vqs_enabled) { > > > > > > + VhostDMAMap mem_region = { > > > > > > + .translated_addr = vaddr, > > > > > > + .size = int128_get64(llsize) - 1, > > > > > > + .perm = IOMMU_ACCESS_FLAG(true, section->readonly), > > > > > > + }; > > > > > > + > > > > > > + int r = vhost_iova_tree_alloc(v->iova_map, &mem_region); > > > > > > + assert(r == VHOST_DMA_MAP_OK); > > > > > > + > > > > > > + iova = mem_region.iova; > > > > > > + } > > > > > > > > > > > > ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize), > > > > > > vaddr, section->readonly); > > > > > > @@ -754,6 +766,23 @@ static bool vhost_vdpa_force_iommu(struct vhost_dev *dev) > > > > > > return true; > > > > > > } > > > > > > > > > > > > +static int vhost_vdpa_get_iova_range(struct vhost_dev *dev, > > > > > > + hwaddr *first, hwaddr *last) > > > > > > +{ > > > > > > + int ret; > > > > > > + struct vhost_vdpa_iova_range range; > > > > > > + > > > > > > + ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_IOVA_RANGE, &range); > > > > > > + if (ret != 0) { > > > > > > + return ret; > > > > > > + } > > > > > > + > > > > > > + *first = range.first; > > > > > > + *last = range.last; > > > > > > + trace_vhost_vdpa_get_iova_range(dev, *first, *last); > > > > > > + return ret; > > > > > > +} > > > > > > + > > > > > > /** > > > > > > * Maps QEMU vaddr memory to device in a suitable way for shadow virtqueue: > > > > > > * - It always reference qemu memory address, not guest's memory. > > > > > > @@ -881,6 +910,7 @@ static bool vhost_vdpa_svq_start_vq(struct vhost_dev *dev, unsigned idx) > > > > > > static unsigned vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable) > > > > > > { > > > > > > struct vhost_dev *hdev = v->dev; > > > > > > + hwaddr iova_first, iova_last; > > > > > > unsigned n; > > > > > > int r; > > > > > > > > > > > > @@ -894,7 +924,7 @@ static unsigned vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable) > > > > > > /* Allocate resources */ > > > > > > assert(v->shadow_vqs->len == 0); > > > > > > for (n = 0; n < hdev->nvqs; ++n) { > > > > > > - VhostShadowVirtqueue *svq = vhost_svq_new(hdev, n); > > > > > > + VhostShadowVirtqueue *svq = vhost_svq_new(hdev, n, v->iova_map); > > > > > > if (unlikely(!svq)) { > > > > > > g_ptr_array_set_size(v->shadow_vqs, 0); > > > > > > return 0; > > > > > > @@ -903,6 +933,8 @@ static unsigned vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable) > > > > > > } > > > > > > } > > > > > > > > > > > > + r = vhost_vdpa_get_iova_range(hdev, &iova_first, &iova_last); > > > > > > + assert(r == 0); > > > > > > r = vhost_vdpa_vring_pause(hdev); > > > > > > assert(r == 0); > > > > > > > > > > > > @@ -913,6 +945,12 @@ static unsigned vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable) > > > > > > } > > > > > > } > > > > > > > > > > > > + memory_listener_unregister(&v->listener); > > > > > > + if (vhost_vdpa_dma_unmap(v, iova_first, > > > > > > + (iova_last - iova_first) & TARGET_PAGE_MASK)) { > > > > > > + error_report("Fail to invalidate device iotlb"); > > > > > > + } > > > > > > + > > > > > > /* Reset device so it can be configured */ > > > > > > r = vhost_vdpa_dev_start(hdev, false); > > > > > > assert(r == 0); > > > > > > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events > > > > > > index 8ed19e9d0c..650e521e35 100644 > > > > > > --- a/hw/virtio/trace-events > > > > > > +++ b/hw/virtio/trace-events > > > > > > @@ -52,6 +52,7 @@ vhost_vdpa_set_vring_call(void *dev, unsigned int index, int fd) "dev: %p index: > > > > > > vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRIx64 > > > > > > vhost_vdpa_set_owner(void *dev) "dev: %p" > > > > > > vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64 > > > > > > +vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64 > > > > > > > > > > > > # virtio.c > > > > > > virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" > > > > > > > > > > > > _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization