Re: [RFC PATCH v4 20/20] vdpa: Add custom IOTLB translations to SVQ

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Oct 20, 2021 at 7:57 PM Eugenio Perez Martin
<eperezma@xxxxxxxxxx> wrote:
>
> On Wed, Oct 20, 2021 at 11:03 AM Jason Wang <jasowang@xxxxxxxxxx> wrote:
> >
> > On Wed, Oct 20, 2021 at 2:52 PM Eugenio Perez Martin
> > <eperezma@xxxxxxxxxx> wrote:
> > >
> > > On Wed, Oct 20, 2021 at 4:07 AM Jason Wang <jasowang@xxxxxxxxxx> wrote:
> > > >
> > > > On Wed, Oct 20, 2021 at 10:02 AM Jason Wang <jasowang@xxxxxxxxxx> wrote:
> > > > >
> > > > > On Tue, Oct 19, 2021 at 6:29 PM Eugenio Perez Martin
> > > > > <eperezma@xxxxxxxxxx> wrote:
> > > > > >
> > > > > > On Tue, Oct 19, 2021 at 11:25 AM Jason Wang <jasowang@xxxxxxxxxx> wrote:
> > > > > > >
> > > > > > >
> > > > > > > 在 2021/10/1 下午3:06, Eugenio Pérez 写道:
> > > > > > > > Use translations added in VhostIOVATree in SVQ.
> > > > > > > >
> > > > > > > > Now every element needs to store the previous address also, so VirtQueue
> > > > > > > > can consume the elements properly. This adds a little overhead per VQ
> > > > > > > > element, having to allocate more memory to stash them. As a possible
> > > > > > > > optimization, this allocation could be avoided if the descriptor is not
> > > > > > > > a chain but a single one, but this is left undone.
> > > > > > > >
> > > > > > > > TODO: iova range should be queried before, and add logic to fail when
> > > > > > > > GPA is outside of its range and memory listener or svq add it.
> > > > > > > >
> > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@xxxxxxxxxx>
> > > > > > > > ---
> > > > > > > >   hw/virtio/vhost-shadow-virtqueue.h |   4 +-
> > > > > > > >   hw/virtio/vhost-shadow-virtqueue.c | 130 ++++++++++++++++++++++++-----
> > > > > > > >   hw/virtio/vhost-vdpa.c             |  40 ++++++++-
> > > > > > > >   hw/virtio/trace-events             |   1 +
> > > > > > > >   4 files changed, 152 insertions(+), 23 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
> > > > > > > > index b7baa424a7..a0e6b5267a 100644
> > > > > > > > --- a/hw/virtio/vhost-shadow-virtqueue.h
> > > > > > > > +++ b/hw/virtio/vhost-shadow-virtqueue.h
> > > > > > > > @@ -11,6 +11,7 @@
> > > > > > > >   #define VHOST_SHADOW_VIRTQUEUE_H
> > > > > > > >
> > > > > > > >   #include "hw/virtio/vhost.h"
> > > > > > > > +#include "hw/virtio/vhost-iova-tree.h"
> > > > > > > >
> > > > > > > >   typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
> > > > > > > >
> > > > > > > > @@ -28,7 +29,8 @@ bool vhost_svq_start(struct vhost_dev *dev, unsigned idx,
> > > > > > > >   void vhost_svq_stop(struct vhost_dev *dev, unsigned idx,
> > > > > > > >                       VhostShadowVirtqueue *svq);
> > > > > > > >
> > > > > > > > -VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx);
> > > > > > > > +VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx,
> > > > > > > > +                                    VhostIOVATree *iova_map);
> > > > > > > >
> > > > > > > >   void vhost_svq_free(VhostShadowVirtqueue *vq);
> > > > > > > >
> > > > > > > > diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
> > > > > > > > index 2fd0bab75d..9db538547e 100644
> > > > > > > > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > > > > > > > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > > > > > > > @@ -11,12 +11,19 @@
> > > > > > > >   #include "hw/virtio/vhost-shadow-virtqueue.h"
> > > > > > > >   #include "hw/virtio/vhost.h"
> > > > > > > >   #include "hw/virtio/virtio-access.h"
> > > > > > > > +#include "hw/virtio/vhost-iova-tree.h"
> > > > > > > >
> > > > > > > >   #include "standard-headers/linux/vhost_types.h"
> > > > > > > >
> > > > > > > >   #include "qemu/error-report.h"
> > > > > > > >   #include "qemu/main-loop.h"
> > > > > > > >
> > > > > > > > +typedef struct SVQElement {
> > > > > > > > +    VirtQueueElement elem;
> > > > > > > > +    void **in_sg_stash;
> > > > > > > > +    void **out_sg_stash;
> > > > > > > > +} SVQElement;
> > > > > > > > +
> > > > > > > >   /* Shadow virtqueue to relay notifications */
> > > > > > > >   typedef struct VhostShadowVirtqueue {
> > > > > > > >       /* Shadow vring */
> > > > > > > > @@ -46,8 +53,11 @@ typedef struct VhostShadowVirtqueue {
> > > > > > > >       /* Virtio device */
> > > > > > > >       VirtIODevice *vdev;
> > > > > > > >
> > > > > > > > +    /* IOVA mapping if used */
> > > > > > > > +    VhostIOVATree *iova_map;
> > > > > > > > +
> > > > > > > >       /* Map for returning guest's descriptors */
> > > > > > > > -    VirtQueueElement **ring_id_maps;
> > > > > > > > +    SVQElement **ring_id_maps;
> > > > > > > >
> > > > > > > >       /* Next head to expose to device */
> > > > > > > >       uint16_t avail_idx_shadow;
> > > > > > > > @@ -79,13 +89,6 @@ bool vhost_svq_valid_device_features(uint64_t *dev_features)
> > > > > > > >               continue;
> > > > > > > >
> > > > > > > >           case VIRTIO_F_ACCESS_PLATFORM:
> > > > > > > > -            /* SVQ needs this feature disabled. Can't continue */
> > > > > > > > -            if (*dev_features & BIT_ULL(b)) {
> > > > > > > > -                clear_bit(b, dev_features);
> > > > > > > > -                r = false;
> > > > > > > > -            }
> > > > > > > > -            break;
> > > > > > > > -
> > > > > > > >           case VIRTIO_F_VERSION_1:
> > > > > > > >               /* SVQ needs this feature, so can't continue */
> > > > > > > >               if (!(*dev_features & BIT_ULL(b))) {
> > > > > > > > @@ -126,6 +129,64 @@ static void vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable)
> > > > > > > >       }
> > > > > > > >   }
> > > > > > > >
> > > > > > > > +static void vhost_svq_stash_addr(void ***stash, const struct iovec *iov,
> > > > > > > > +                                 size_t num)
> > > > > > > > +{
> > > > > > > > +    size_t i;
> > > > > > > > +
> > > > > > > > +    if (num == 0) {
> > > > > > > > +        return;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    *stash = g_new(void *, num);
> > > > > > > > +    for (i = 0; i < num; ++i) {
> > > > > > > > +        (*stash)[i] = iov[i].iov_base;
> > > > > > > > +    }
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void vhost_svq_unstash_addr(void **stash, struct iovec *iov, size_t num)
> > > > > > > > +{
> > > > > > > > +    size_t i;
> > > > > > > > +
> > > > > > > > +    if (num == 0) {
> > > > > > > > +        return;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    for (i = 0; i < num; ++i) {
> > > > > > > > +        iov[i].iov_base = stash[i];
> > > > > > > > +    }
> > > > > > > > +    g_free(stash);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
> > > > > > > > +                                     struct iovec *iovec, size_t num)
> > > > > > > > +{
> > > > > > > > +    size_t i;
> > > > > > > > +
> > > > > > > > +    for (i = 0; i < num; ++i) {
> > > > > > > > +        VhostDMAMap needle = {
> > > > > > > > +            .translated_addr = iovec[i].iov_base,
> > > > > > > > +            .size = iovec[i].iov_len,
> > > > > > > > +        };
> > > > > > > > +        size_t off;
> > > > > > > > +
> > > > > > > > +        const VhostDMAMap *map = vhost_iova_tree_find_iova(svq->iova_map,
> > > > > > > > +                                                           &needle);
> > > > > > >
> > > > > > >
> > > > > > > Is it possible that we end up with more than one maps here?
> > > > > > >
> > > > > >
> > > > > > Actually it is possible, since there is no guarantee that one
> > > > > > descriptor (or indirect descriptor) maps exactly to one iov. It could
> > > > > > map to many if qemu vaddr is not contiguous but GPA + size is. This is
> > > > > > something that must be fixed for the next revision, so thanks for
> > > > > > pointing it out!
> > > > > >
> > > > > > Taking that into account, the condition that svq vring avail_idx -
> > > > > > used_idx was always less or equal than guest's vring avail_idx -
> > > > > > used_idx is not true anymore. Checking for that before adding buffers
> > > > > > to SVQ is the easy part, but how could we recover in that case?
> > > > > >
> > > > > > I think that the easy solution is to check for more available buffers
> > > > > > unconditionally at the end of vhost_svq_handle_call, which handles the
> > > > > > SVQ used and is supposed to make more room for available buffers. So
> > > > > > vhost_handle_guest_kick would not check if eventfd is set or not
> > > > > > anymore.
> > > > > >
> > > > > > Would that make sense?
> > > > >
> > > > > Yes, I think it should work.
> > > >
> > > > Btw, I wonder how to handle indirect descriptors. SVQ doesn't use
> > > > indirect descriptors for now, but it looks like a must otherwise we
> > > > may end up SVQ is full before VQ.
> > > >
> > >
> > > We can get to that situation without indirect too, if a single
> > > descriptor maps to more than one sg buffer. The next revision is going
> > > to control that too.
> > >
> > > > It looks to me an easy way is to always use indirect descriptors if #sg >= 2?
> > > >
> > >
> > > I will use that, but that does not solve the case where a descriptor
> > > maps to > 1 different buffers in qemu vaddr.
> >
> > Right, so we need to deal with the case when SVQ is out of space.
> >
> >
> > > So I think that some
> > > check after marking descriptors as used is a must somehow.
> >
> > I thought it should be before processing the available buffer?
>
> Yes, I meant after that. Somehow, because I include checking the
> number of sg buffers as "processing". :).
>
> > It's
> > the guest driver that make sure there's sufficient space for used
> > ring?
> >
>
> (I think we are talking the same with different words, but just in
> case I will develop the idea here with an example).
>
> The guest is able to check if there is enough space in the SVQ's
> vring, but not in the device's vring. As an example of this, imagine
> that a guest makes available a GPA contiguous buffer of 64K, one
> descriptor. However, this memory is divided into 16 chunks of 4K in
> qemu's VA space. Imagine that at this moment there are only eight
> slots free in each vring, and that neither communication is using
> indirect descriptors.
>
> The guest only needs 1 descriptor available to make that buffer
> available, so it will add to avail ring. But SVQ needs 16 chained
> descriptors, so the buffer is not going to reach the device until it
> makes at least 8 more descriptors as used. SVQ checked for the amount
> of available room, as you said, but it cannot forward the available
> one.
>
> Since the guest already sent kick when it made the descriptor
> available, we need another mechanism to know when we have all the
> needed free slots in the SVQ vring. And that's what I meant with the
> check after marking some buffers as available.
>
> I still think it is not worth it to protect the forwarding methods of
> hogging BQL, since there must be a limit sooner or later, but it is
> something that is worth putting on the table again. But this requires
> changes for the next version for sure.

Ok.

>
> I can think in more scenarios, like guest making available an indirect
> descriptor of vq size that needs to be splitted in even more sgs. Qemu
> already does not support more than 1024 sgs buffers in VirtQueue, but
> a driver (as SVQ) must *not* create an indirect descriptor chain
> longer than the Queue Size. Should we always increase vq size to 1024
> always? I think these are highly unlikely, but again these concerns
> must be at least commented here.
>
> Does it make sense?

Right. So I think the SVQ codes should be ready to handle all those cases.

Thanks

>
> Thanks!
>
> > Thanks
> >
> > >
> > >
> > > > Thanks
> > > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > >
> > > > > > > > +        /*
> > > > > > > > +         * Map cannot be NULL since iova map contains all guest space and
> > > > > > > > +         * qemu already has a physical address mapped
> > > > > > > > +         */
> > > > > > > > +        assert(map);
> > > > > > > > +
> > > > > > > > +        /*
> > > > > > > > +         * Map->iova chunk size is ignored. What to do if descriptor
> > > > > > > > +         * (addr, size) does not fit is delegated to the device.
> > > > > > > > +         */
> > > > > > > > +        off = needle.translated_addr - map->translated_addr;
> > > > > > > > +        iovec[i].iov_base = (void *)(map->iova + off);
> > > > > > > > +    }
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >   static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> > > > > > > >                                       const struct iovec *iovec,
> > > > > > > >                                       size_t num, bool more_descs, bool write)
> > > > > > > > @@ -156,8 +217,9 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> > > > > > > >   }
> > > > > > > >
> > > > > > > >   static unsigned vhost_svq_add_split(VhostShadowVirtqueue *svq,
> > > > > > > > -                                    VirtQueueElement *elem)
> > > > > > > > +                                    SVQElement *svq_elem)
> > > > > > > >   {
> > > > > > > > +    VirtQueueElement *elem = &svq_elem->elem;
> > > > > > > >       int head;
> > > > > > > >       unsigned avail_idx;
> > > > > > > >       vring_avail_t *avail = svq->vring.avail;
> > > > > > > > @@ -167,6 +229,12 @@ static unsigned vhost_svq_add_split(VhostShadowVirtqueue *svq,
> > > > > > > >       /* We need some descriptors here */
> > > > > > > >       assert(elem->out_num || elem->in_num);
> > > > > > > >
> > > > > > > > +    vhost_svq_stash_addr(&svq_elem->in_sg_stash, elem->in_sg, elem->in_num);
> > > > > > > > +    vhost_svq_stash_addr(&svq_elem->out_sg_stash, elem->out_sg, elem->out_num);
> > > > > > >
> > > > > > >
> > > > > > > I wonder if we can solve the trick like stash and unstash with a
> > > > > > > dedicated sgs in svq_elem, instead of reusing the elem.
> > > > > > >
> > > > > >
> > > > > > Actually yes, it would be way simpler to use a new sgs array in
> > > > > > svq_elem. I will change that.
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > > > +
> > > > > > > > +    vhost_svq_translate_addr(svq, elem->in_sg, elem->in_num);
> > > > > > > > +    vhost_svq_translate_addr(svq, elem->out_sg, elem->out_num);
> > > > > > > > +
> > > > > > > >       vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
> > > > > > > >                               elem->in_num > 0, false);
> > > > > > > >       vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
> > > > > > > > @@ -187,7 +255,7 @@ static unsigned vhost_svq_add_split(VhostShadowVirtqueue *svq,
> > > > > > > >
> > > > > > > >   }
> > > > > > > >
> > > > > > > > -static void vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
> > > > > > > > +static void vhost_svq_add(VhostShadowVirtqueue *svq, SVQElement *elem)
> > > > > > > >   {
> > > > > > > >       unsigned qemu_head = vhost_svq_add_split(svq, elem);
> > > > > > > >
> > > > > > > > @@ -221,7 +289,7 @@ static void vhost_handle_guest_kick(EventNotifier *n)
> > > > > > > >           }
> > > > > > > >
> > > > > > > >           while (true) {
> > > > > > > > -            VirtQueueElement *elem = virtqueue_pop(svq->vq, sizeof(*elem));
> > > > > > > > +            SVQElement *elem = virtqueue_pop(svq->vq, sizeof(*elem));
> > > > > > > >               if (!elem) {
> > > > > > > >                   break;
> > > > > > > >               }
> > > > > > > > @@ -247,7 +315,7 @@ static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
> > > > > > > >       return svq->used_idx != svq->shadow_used_idx;
> > > > > > > >   }
> > > > > > > >
> > > > > > > > -static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq)
> > > > > > > > +static SVQElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq)
> > > > > > > >   {
> > > > > > > >       vring_desc_t *descs = svq->vring.desc;
> > > > > > > >       const vring_used_t *used = svq->vring.used;
> > > > > > > > @@ -279,7 +347,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq)
> > > > > > > >       descs[used_elem.id].next = svq->free_head;
> > > > > > > >       svq->free_head = used_elem.id;
> > > > > > > >
> > > > > > > > -    svq->ring_id_maps[used_elem.id]->len = used_elem.len;
> > > > > > > > +    svq->ring_id_maps[used_elem.id]->elem.len = used_elem.len;
> > > > > > > >       return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
> > > > > > > >   }
> > > > > > > >
> > > > > > > > @@ -296,12 +364,19 @@ static void vhost_svq_handle_call_no_test(EventNotifier *n)
> > > > > > > >
> > > > > > > >           vhost_svq_set_notification(svq, false);
> > > > > > > >           while (true) {
> > > > > > > > -            g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq);
> > > > > > > > -            if (!elem) {
> > > > > > > > +            g_autofree SVQElement *svq_elem = vhost_svq_get_buf(svq);
> > > > > > > > +            VirtQueueElement *elem;
> > > > > > > > +            if (!svq_elem) {
> > > > > > > >                   break;
> > > > > > > >               }
> > > > > > > >
> > > > > > > >               assert(i < svq->vring.num);
> > > > > > > > +            elem = &svq_elem->elem;
> > > > > > > > +
> > > > > > > > +            vhost_svq_unstash_addr(svq_elem->in_sg_stash, elem->in_sg,
> > > > > > > > +                                   elem->in_num);
> > > > > > > > +            vhost_svq_unstash_addr(svq_elem->out_sg_stash, elem->out_sg,
> > > > > > > > +                                   elem->out_num);
> > > > > > > >               virtqueue_fill(vq, elem, elem->len, i++);
> > > > > > > >           }
> > > > > > > >
> > > > > > > > @@ -451,14 +526,24 @@ void vhost_svq_stop(struct vhost_dev *dev, unsigned idx,
> > > > > > > >       event_notifier_set_handler(&svq->host_notifier, NULL);
> > > > > > > >
> > > > > > > >       for (i = 0; i < svq->vring.num; ++i) {
> > > > > > > > -        g_autofree VirtQueueElement *elem = svq->ring_id_maps[i];
> > > > > > > > +        g_autofree SVQElement *svq_elem = svq->ring_id_maps[i];
> > > > > > > > +        VirtQueueElement *elem;
> > > > > > > > +
> > > > > > > > +        if (!svq_elem) {
> > > > > > > > +            continue;
> > > > > > > > +        }
> > > > > > > > +
> > > > > > > > +        elem = &svq_elem->elem;
> > > > > > > > +        vhost_svq_unstash_addr(svq_elem->in_sg_stash, elem->in_sg,
> > > > > > > > +                               elem->in_num);
> > > > > > > > +        vhost_svq_unstash_addr(svq_elem->out_sg_stash, elem->out_sg,
> > > > > > > > +                               elem->out_num);
> > > > > > > > +
> > > > > > > >           /*
> > > > > > > >            * Although the doc says we must unpop in order, it's ok to unpop
> > > > > > > >            * everything.
> > > > > > > >            */
> > > > > > > > -        if (elem) {
> > > > > > > > -            virtqueue_unpop(svq->vq, elem, elem->len);
> > > > > > > > -        }
> > > > > > > > +        virtqueue_unpop(svq->vq, elem, elem->len);
> > > > > > > >       }
> > > > > > > >   }
> > > > > > > >
> > > > > > > > @@ -466,7 +551,8 @@ void vhost_svq_stop(struct vhost_dev *dev, unsigned idx,
> > > > > > > >    * Creates vhost shadow virtqueue, and instruct vhost device to use the shadow
> > > > > > > >    * methods and file descriptors.
> > > > > > > >    */
> > > > > > > > -VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx)
> > > > > > > > +VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx,
> > > > > > > > +                                    VhostIOVATree *iova_map)
> > > > > > > >   {
> > > > > > > >       int vq_idx = dev->vq_index + idx;
> > > > > > > >       unsigned num = virtio_queue_get_num(dev->vdev, vq_idx);
> > > > > > > > @@ -500,11 +586,13 @@ VhostShadowVirtqueue *vhost_svq_new(struct vhost_dev *dev, int idx)
> > > > > > > >       memset(svq->vring.desc, 0, driver_size);
> > > > > > > >       svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
> > > > > > > >       memset(svq->vring.used, 0, device_size);
> > > > > > > > +    svq->iova_map = iova_map;
> > > > > > > > +
> > > > > > > >       for (i = 0; i < num - 1; i++) {
> > > > > > > >           svq->vring.desc[i].next = cpu_to_le16(i + 1);
> > > > > > > >       }
> > > > > > > >
> > > > > > > > -    svq->ring_id_maps = g_new0(VirtQueueElement *, num);
> > > > > > > > +    svq->ring_id_maps = g_new0(SVQElement *, num);
> > > > > > > >       event_notifier_set_handler(&svq->call_notifier,
> > > > > > > >                                  vhost_svq_handle_call);
> > > > > > > >       return g_steal_pointer(&svq);
> > > > > > > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > > > > > > > index a9c680b487..f5a12fee9d 100644
> > > > > > > > --- a/hw/virtio/vhost-vdpa.c
> > > > > > > > +++ b/hw/virtio/vhost-vdpa.c
> > > > > > > > @@ -176,6 +176,18 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
> > > > > > > >                                            vaddr, section->readonly);
> > > > > > > >
> > > > > > > >       llsize = int128_sub(llend, int128_make64(iova));
> > > > > > > > +    if (v->shadow_vqs_enabled) {
> > > > > > > > +        VhostDMAMap mem_region = {
> > > > > > > > +            .translated_addr = vaddr,
> > > > > > > > +            .size = int128_get64(llsize) - 1,
> > > > > > > > +            .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
> > > > > > > > +        };
> > > > > > > > +
> > > > > > > > +        int r = vhost_iova_tree_alloc(v->iova_map, &mem_region);
> > > > > > > > +        assert(r == VHOST_DMA_MAP_OK);
> > > > > > > > +
> > > > > > > > +        iova = mem_region.iova;
> > > > > > > > +    }
> > > > > > > >
> > > > > > > >       ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
> > > > > > > >                                vaddr, section->readonly);
> > > > > > > > @@ -754,6 +766,23 @@ static bool  vhost_vdpa_force_iommu(struct vhost_dev *dev)
> > > > > > > >       return true;
> > > > > > > >   }
> > > > > > > >
> > > > > > > > +static int vhost_vdpa_get_iova_range(struct vhost_dev *dev,
> > > > > > > > +                                     hwaddr *first, hwaddr *last)
> > > > > > > > +{
> > > > > > > > +    int ret;
> > > > > > > > +    struct vhost_vdpa_iova_range range;
> > > > > > > > +
> > > > > > > > +    ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_IOVA_RANGE, &range);
> > > > > > > > +    if (ret != 0) {
> > > > > > > > +        return ret;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    *first = range.first;
> > > > > > > > +    *last = range.last;
> > > > > > > > +    trace_vhost_vdpa_get_iova_range(dev, *first, *last);
> > > > > > > > +    return ret;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >   /**
> > > > > > > >    * Maps QEMU vaddr memory to device in a suitable way for shadow virtqueue:
> > > > > > > >    * - It always reference qemu memory address, not guest's memory.
> > > > > > > > @@ -881,6 +910,7 @@ static bool vhost_vdpa_svq_start_vq(struct vhost_dev *dev, unsigned idx)
> > > > > > > >   static unsigned vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable)
> > > > > > > >   {
> > > > > > > >       struct vhost_dev *hdev = v->dev;
> > > > > > > > +    hwaddr iova_first, iova_last;
> > > > > > > >       unsigned n;
> > > > > > > >       int r;
> > > > > > > >
> > > > > > > > @@ -894,7 +924,7 @@ static unsigned vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable)
> > > > > > > >           /* Allocate resources */
> > > > > > > >           assert(v->shadow_vqs->len == 0);
> > > > > > > >           for (n = 0; n < hdev->nvqs; ++n) {
> > > > > > > > -            VhostShadowVirtqueue *svq = vhost_svq_new(hdev, n);
> > > > > > > > +            VhostShadowVirtqueue *svq = vhost_svq_new(hdev, n, v->iova_map);
> > > > > > > >               if (unlikely(!svq)) {
> > > > > > > >                   g_ptr_array_set_size(v->shadow_vqs, 0);
> > > > > > > >                   return 0;
> > > > > > > > @@ -903,6 +933,8 @@ static unsigned vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable)
> > > > > > > >           }
> > > > > > > >       }
> > > > > > > >
> > > > > > > > +    r = vhost_vdpa_get_iova_range(hdev, &iova_first, &iova_last);
> > > > > > > > +    assert(r == 0);
> > > > > > > >       r = vhost_vdpa_vring_pause(hdev);
> > > > > > > >       assert(r == 0);
> > > > > > > >
> > > > > > > > @@ -913,6 +945,12 @@ static unsigned vhost_vdpa_enable_svq(struct vhost_vdpa *v, bool enable)
> > > > > > > >           }
> > > > > > > >       }
> > > > > > > >
> > > > > > > > +    memory_listener_unregister(&v->listener);
> > > > > > > > +    if (vhost_vdpa_dma_unmap(v, iova_first,
> > > > > > > > +                             (iova_last - iova_first) & TARGET_PAGE_MASK)) {
> > > > > > > > +        error_report("Fail to invalidate device iotlb");
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > >       /* Reset device so it can be configured */
> > > > > > > >       r = vhost_vdpa_dev_start(hdev, false);
> > > > > > > >       assert(r == 0);
> > > > > > > > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> > > > > > > > index 8ed19e9d0c..650e521e35 100644
> > > > > > > > --- a/hw/virtio/trace-events
> > > > > > > > +++ b/hw/virtio/trace-events
> > > > > > > > @@ -52,6 +52,7 @@ vhost_vdpa_set_vring_call(void *dev, unsigned int index, int fd) "dev: %p index:
> > > > > > > >   vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRIx64
> > > > > > > >   vhost_vdpa_set_owner(void *dev) "dev: %p"
> > > > > > > >   vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
> > > > > > > > +vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64
> > > > > > > >
> > > > > > > >   # virtio.c
> > > > > > > >   virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
> > > > > > >
> > > > > >
> > > >
> > >
> >
>

_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization




[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux