在 2022/2/27 下午9:41, Eugenio Pérez 写道:
Use translations added in VhostIOVATree in SVQ.
Only introduce usage here, not allocation and deallocation. As with
previous patches, we use the dead code paths of shadow_vqs_enabled to
avoid commiting too many changes at once. These are impossible to take
at the moment.
Signed-off-by: Eugenio Pérez <eperezma@xxxxxxxxxx>
---
hw/virtio/vhost-shadow-virtqueue.h | 6 +-
include/hw/virtio/vhost-vdpa.h | 3 +
hw/virtio/vhost-shadow-virtqueue.c | 76 ++++++++++++++++-
hw/virtio/vhost-vdpa.c | 128 ++++++++++++++++++++++++-----
4 files changed, 187 insertions(+), 26 deletions(-)
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
index 04c67685fd..b2f722d101 100644
--- a/hw/virtio/vhost-shadow-virtqueue.h
+++ b/hw/virtio/vhost-shadow-virtqueue.h
@@ -13,6 +13,7 @@
#include "qemu/event_notifier.h"
#include "hw/virtio/virtio.h"
#include "standard-headers/linux/vhost_types.h"
+#include "hw/virtio/vhost-iova-tree.h"
/* Shadow virtqueue to relay notifications */
typedef struct VhostShadowVirtqueue {
@@ -43,6 +44,9 @@ typedef struct VhostShadowVirtqueue {
/* Virtio device */
VirtIODevice *vdev;
+ /* IOVA mapping */
+ VhostIOVATree *iova_tree;
+
/* Map for use the guest's descriptors */
VirtQueueElement **ring_id_maps;
@@ -78,7 +82,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
VirtQueue *vq);
void vhost_svq_stop(VhostShadowVirtqueue *svq);
-VhostShadowVirtqueue *vhost_svq_new(void);
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
void vhost_svq_free(gpointer vq);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
index 009a9f3b6b..ee8e939ad0 100644
--- a/include/hw/virtio/vhost-vdpa.h
+++ b/include/hw/virtio/vhost-vdpa.h
@@ -14,6 +14,7 @@
#include <gmodule.h>
+#include "hw/virtio/vhost-iova-tree.h"
#include "hw/virtio/virtio.h"
#include "standard-headers/linux/vhost_types.h"
@@ -30,6 +31,8 @@ typedef struct vhost_vdpa {
MemoryListener listener;
struct vhost_vdpa_iova_range iova_range;
bool shadow_vqs_enabled;
+ /* IOVA mapping used by the Shadow Virtqueue */
+ VhostIOVATree *iova_tree;
GPtrArray *shadow_vqs;
struct vhost_dev *dev;
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
index a38d313755..7e073773d1 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -11,6 +11,7 @@
#include "hw/virtio/vhost-shadow-virtqueue.h"
#include "qemu/error-report.h"
+#include "qemu/log.h"
#include "qemu/main-loop.h"
#include "qemu/log.h"
#include "linux-headers/linux/vhost.h"
@@ -84,7 +85,58 @@ static void vhost_svq_set_notification(VhostShadowVirtqueue *svq, bool enable)
}
}
+/**
+ * Translate addresses between the qemu's virtual address and the SVQ IOVA
+ *
+ * @svq Shadow VirtQueue
+ * @vaddr Translated IOVA addresses
+ * @iovec Source qemu's VA addresses
+ * @num Length of iovec and minimum length of vaddr
+ */
+static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
+ void **addrs, const struct iovec *iovec,
+ size_t num)
+{
+ if (num == 0) {
+ return true;
+ }
+
+ for (size_t i = 0; i < num; ++i) {
+ DMAMap needle = {
+ .translated_addr = (hwaddr)iovec[i].iov_base,
+ .size = iovec[i].iov_len,
+ };
+ size_t off;
+
+ const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
+ /*
+ * Map cannot be NULL since iova map contains all guest space and
+ * qemu already has a physical address mapped
+ */
+ if (unlikely(!map)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Invalid address 0x%"HWADDR_PRIx" given by guest",
+ needle.translated_addr);
+ return false;
+ }
+
+ off = needle.translated_addr - map->translated_addr;
+ addrs[i] = (void *)(map->iova + off);
+
+ if (unlikely(int128_gt(int128_add(needle.translated_addr,
+ iovec[i].iov_len),
+ map->translated_addr + map->size))) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "Guest buffer expands over iova range");
+ return false;
+ }
+ }
+
+ return true;
+}
+
static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
+ void * const *vaddr_sg,
Nit: it looks to me we are not passing vaddr but iova here, so it might
be better to use "sg"?
const struct iovec *iovec,
size_t num, bool more_descs, bool write)
{
@@ -103,7 +155,7 @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
} else {
descs[i].flags = flags;
}
- descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base);
+ descs[i].addr = cpu_to_le64((hwaddr)vaddr_sg[n]);
descs[i].len = cpu_to_le32(iovec[n].iov_len);
last = i;
@@ -119,6 +171,8 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
{
unsigned avail_idx;
vring_avail_t *avail = svq->vring.avail;
+ bool ok;
+ g_autofree void **sgs = g_new(void *, MAX(elem->out_num, elem->in_num));
*head = svq->free_head;
@@ -129,9 +183,20 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
return false;
}
- vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
+ ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
+ if (unlikely(!ok)) {
+ return false;
+ }
+ vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
elem->in_num > 0, false);
- vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
+
+
+ ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
+ if (unlikely(!ok)) {
+ return false;
+ }
+
+ vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
/*
* Put the entry in the available array (but don't update avail->idx until
@@ -514,11 +579,13 @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
* shadow methods and file descriptors.
*
+ * @iova_tree Tree to perform descriptors translations
+ *
* Returns the new virtqueue or NULL.
*
* In case of error, reason is reported through error_report.
*/
-VhostShadowVirtqueue *vhost_svq_new(void)
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
{
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
int r;
@@ -539,6 +606,7 @@ VhostShadowVirtqueue *vhost_svq_new(void)
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
+ svq->iova_tree = iova_tree;
return g_steal_pointer(&svq);
err_init_hdev_call:
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 435b9c2e9e..56f9f125cd 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -209,6 +209,21 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
vaddr, section->readonly);
llsize = int128_sub(llend, int128_make64(iova));
+ if (v->shadow_vqs_enabled) {
+ DMAMap mem_region = {
+ .translated_addr = (hwaddr)vaddr,
+ .size = int128_get64(llsize) - 1,
+ .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
+ };
+
+ int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
+ if (unlikely(r != IOVA_OK)) {
+ error_report("Can't allocate a mapping (%d)", r);
+ goto fail;
+ }
+
+ iova = mem_region.iova;
+ }
vhost_vdpa_iotlb_batch_begin_once(v);
ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
@@ -261,6 +276,20 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
llsize = int128_sub(llend, int128_make64(iova));
+ if (v->shadow_vqs_enabled) {
+ const DMAMap *result;
+ const void *vaddr = memory_region_get_ram_ptr(section->mr) +
+ section->offset_within_region +
+ (iova - section->offset_within_address_space);
+ DMAMap mem_region = {
+ .translated_addr = (hwaddr)vaddr,
+ .size = int128_get64(llsize) - 1,
+ };
+
+ result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
+ iova = result->iova;
+ vhost_iova_tree_remove(v->iova_tree, &mem_region);
+ }
vhost_vdpa_iotlb_batch_begin_once(v);
ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
if (ret) {
@@ -383,7 +412,7 @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
for (unsigned n = 0; n < hdev->nvqs; ++n) {
- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
if (unlikely(!svq)) {
error_setg(errp, "Cannot create svq %u", n);
@@ -834,37 +863,78 @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
/**
* Unmap a SVQ area in the device
*/
-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
- hwaddr size)
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
+ const DMAMap *needle)
{
+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
+ hwaddr size;
int r;
- size = ROUND_UP(size, qemu_real_host_page_size);
- r = vhost_vdpa_dma_unmap(v, iova, size);
+ if (unlikely(!result)) {
+ error_report("Unable to find SVQ address to unmap");
+ return false;
+ }
+
+ size = ROUND_UP(result->size, qemu_real_host_page_size);
+ r = vhost_vdpa_dma_unmap(v, result->iova, size);
return r == 0;
}
static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
const VhostShadowVirtqueue *svq)
{
+ DMAMap needle;
struct vhost_vdpa *v = dev->opaque;
struct vhost_vring_addr svq_addr;
- size_t device_size = vhost_svq_device_area_size(svq);
- size_t driver_size = vhost_svq_driver_area_size(svq);
bool ok;
vhost_svq_get_vring_addr(svq, &svq_addr);
- ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
+ needle = (DMAMap) {
+ .translated_addr = svq_addr.desc_user_addr,
+ };
Let's simply initialize the member to zero during start of this function
then we can use needle->transalted_addr = XXX here.
+ ok = vhost_vdpa_svq_unmap_ring(v, &needle);
if (unlikely(!ok)) {
return false;
}
- return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
+ needle = (DMAMap) {
+ .translated_addr = svq_addr.used_user_addr,
+ };
+ return vhost_vdpa_svq_unmap_ring(v, &needle);
+}
+
+/**
+ * Map the SVQ area in the device
+ *
+ * @v Vhost-vdpa device
+ * @needle The area to search iova
+ * @errorp Error pointer
+ */
+static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
+ Error **errp)
+{
+ int r;
+
+ r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
+ if (unlikely(r != IOVA_OK)) {
+ error_setg(errp, "Cannot allocate iova (%d)", r);
+ return false;
+ }
+
+ r = vhost_vdpa_dma_map(v, needle->iova, needle->size,
+ (void *)needle->translated_addr,
+ !(needle->perm & IOMMU_ACCESS_FLAG(0, 1)));
Let's simply use needle->perm == IOMMU_RO here?
+ if (unlikely(r != 0)) {
+ error_setg_errno(errp, -r, "Cannot map region to device");
+ vhost_iova_tree_remove(v->iova_tree, needle);
+ }
+
+ return r == 0;
}
/**
- * Map shadow virtqueue rings in device
+ * Map the shadow virtqueue rings in the device
*
* @dev The vhost device
* @svq The shadow virtqueue
@@ -876,28 +946,44 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
struct vhost_vring_addr *addr,
Error **errp)
{
+ DMAMap device_region, driver_region;
+ struct vhost_vring_addr svq_addr;
struct vhost_vdpa *v = dev->opaque;
size_t device_size = vhost_svq_device_area_size(svq);
size_t driver_size = vhost_svq_driver_area_size(svq);
- int r;
+ size_t avail_offset;
+ bool ok;
ERRP_GUARD();
- vhost_svq_get_vring_addr(svq, addr);
+ vhost_svq_get_vring_addr(svq, &svq_addr);
- r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
- (void *)addr->desc_user_addr, true);
- if (unlikely(r != 0)) {
- error_setg_errno(errp, -r, "Cannot create vq driver region: ");
+ driver_region = (DMAMap) {
+ .translated_addr = svq_addr.desc_user_addr,
+ .size = driver_size - 1,
Any reason for the "-1" here? I see several places do things like that,
it's probably hint of wrong API somehwere.
Thanks
+ .perm = IOMMU_RO,
+ };
+ ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
+ if (unlikely(!ok)) {
+ error_prepend(errp, "Cannot create vq driver region: ");
return false;
}
+ addr->desc_user_addr = driver_region.iova;
+ avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
+ addr->avail_user_addr = driver_region.iova + avail_offset;
- r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
- (void *)addr->used_user_addr, false);
- if (unlikely(r != 0)) {
- error_setg_errno(errp, -r, "Cannot create vq device region: ");
+ device_region = (DMAMap) {
+ .translated_addr = svq_addr.used_user_addr,
+ .size = device_size - 1,
+ .perm = IOMMU_RW,
+ };
+ ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
+ if (unlikely(!ok)) {
+ error_prepend(errp, "Cannot create vq device region: ");
+ vhost_vdpa_svq_unmap_ring(v, &driver_region);
}
+ addr->used_user_addr = device_region.iova;
- return r == 0;
+ return ok;
}
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization