The ioctl(iommufd, IOAS_UNMAP_DIRTY) performs an unmap of an IOVA range and returns whether or not it was dirty. The kernel atomically clears the IOPTE while telling if the old IOPTE was dirty or not. This in theory is needed for the vIOMMU case to handle a potentially erronous guest PCI device performing DMA on an IOVA that is simultaneous being IOMMU-unmap... to then transfer that dirty page into the destination. Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx> --- hw/iommufd/iommufd.c | 21 +++++++++++ hw/iommufd/trace-events | 1 + hw/vfio/iommufd.c | 72 +++++++++++++++++++++++++++++++++++- include/hw/iommufd/iommufd.h | 3 ++ 4 files changed, 96 insertions(+), 1 deletion(-) diff --git a/hw/iommufd/iommufd.c b/hw/iommufd/iommufd.c index bc870b5e9b2f..0f7d9f22ae52 100644 --- a/hw/iommufd/iommufd.c +++ b/hw/iommufd/iommufd.c @@ -243,6 +243,27 @@ int iommufd_get_dirty_iova(int iommufd, uint32_t hwpt_id, uint64_t iova, return !ret ? 0 : -errno; } +int iommufd_unmap_dma_dirty(int iommufd, uint32_t ioas, hwaddr iova, + ram_addr_t size, uint64_t page_size, uint64_t *data) +{ + int ret; + struct iommu_ioas_unmap_dirty unmap = { + .size = sizeof(unmap), + .ioas_id = ioas, + .bitmap = { + .iova = iova, .length = size, + .page_size = page_size, .data = (__u64 *)data, + }, + }; + + ret = ioctl(iommufd, IOMMU_IOAS_UNMAP_DIRTY, &unmap); + trace_iommufd_unmap_dma_dirty(iommufd, ioas, iova, size, page_size, ret); + if (ret) { + error_report("IOMMU_IOAS_UNMAP_DIRTY failed: %s", strerror(errno)); + } + return !ret ? 0 : -errno; +} + static void iommufd_register_types(void) { qemu_mutex_init(&iommufd_lock); diff --git a/hw/iommufd/trace-events b/hw/iommufd/trace-events index 9fe2cc60c6fe..3e99290a9a77 100644 --- a/hw/iommufd/trace-events +++ b/hw/iommufd/trace-events @@ -11,3 +11,4 @@ iommufd_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void * iommufd_copy_dma(int iommufd, uint32_t src_ioas, uint32_t dst_ioas, uint64_t iova, uint64_t size, bool readonly, int ret) " iommufd=%d src_ioas=%d dst_ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" readonly=%d (%d)" iommufd_set_dirty(int iommufd, uint32_t hwpt_id, bool start, int ret) " iommufd=%d hwpt=%d enable=%d (%d)" iommufd_get_dirty_iova(int iommufd, uint32_t hwpt_id, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d hwpt=%d iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)" +iommufd_unmap_dma_dirty(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, uint64_t page_size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" page_size=0x%"PRIx64" (%d)" diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c index 6c12239a40ab..d75ecbf2ae52 100644 --- a/hw/vfio/iommufd.c +++ b/hw/vfio/iommufd.c @@ -36,6 +36,8 @@ #include "exec/ram_addr.h" #include "migration/migration.h" +static bool vfio_devices_all_running_and_saving(VFIOContainer *container); + static bool iommufd_check_extension(VFIOContainer *bcontainer, VFIOContainerFeature feat) { @@ -72,6 +74,36 @@ static int iommufd_copy(VFIOContainer *src, VFIOContainer *dst, container_dst->ioas_id, iova, size, readonly); } +static int iommufd_unmap_bitmap(int iommufd, int ioas_id, hwaddr iova, + ram_addr_t size, ram_addr_t translated) +{ + unsigned long *data, pgsize, bitmap_size, pages; + int ret; + + pgsize = qemu_real_host_page_size; + pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size; + bitmap_size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) / + BITS_PER_BYTE; + data = g_try_malloc0(bitmap_size); + if (!data) { + ret = -ENOMEM; + goto err_out; + } + + ret = iommufd_unmap_dma_dirty(iommufd, ioas_id, iova, size, pgsize, data); + if (ret) { + goto err_out; + } + + cpu_physical_memory_set_dirty_lebitmap(data, translated, pages); + + trace_vfio_get_dirty_bitmap(iommufd, iova, size, bitmap_size, translated); + +err_out: + g_free(data); + return ret; +} + static int iommufd_unmap(VFIOContainer *bcontainer, hwaddr iova, ram_addr_t size, IOMMUTLBEntry *iotlb) @@ -79,7 +111,13 @@ static int iommufd_unmap(VFIOContainer *bcontainer, VFIOIOMMUFDContainer *container = container_of(bcontainer, VFIOIOMMUFDContainer, obj); - /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */ + if (iotlb && bcontainer->dirty_pages_supported && + vfio_devices_all_running_and_saving(bcontainer)) { + return iommufd_unmap_bitmap(container->iommufd, + container->ioas_id, iova, size, + iotlb->translated_addr); + } + return iommufd_unmap_dma(container->iommufd, container->ioas_id, iova, size); } @@ -367,6 +405,38 @@ static int vfio_device_reset(VFIODevice *vbasedev) return 0; } +static bool vfio_devices_all_running_and_saving(VFIOContainer *bcontainer) +{ + MigrationState *ms = migrate_get_current(); + VFIOIOMMUFDContainer *container; + VFIODevice *vbasedev; + VFIOIOASHwpt *hwpt; + + if (!migration_is_setup_or_active(ms->state)) { + return false; + } + + container = container_of(bcontainer, VFIOIOMMUFDContainer, obj); + + QLIST_FOREACH(hwpt, &container->hwpt_list, next) { + QLIST_FOREACH(vbasedev, &hwpt->device_list, hwpt_next) { + VFIOMigration *migration = vbasedev->migration; + + if (!migration) { + return false; + } + + if ((migration->device_state & VFIO_DEVICE_STATE_SAVING) && + (migration->device_state & VFIO_DEVICE_STATE_RUNNING)) { + continue; + } else { + return false; + } + } + } + return true; +} + static bool vfio_iommufd_devices_all_dirty_tracking(VFIOContainer *bcontainer) { MigrationState *ms = migrate_get_current(); diff --git a/include/hw/iommufd/iommufd.h b/include/hw/iommufd/iommufd.h index 9b467e57723b..2c58b95d619c 100644 --- a/include/hw/iommufd/iommufd.h +++ b/include/hw/iommufd/iommufd.h @@ -36,5 +36,8 @@ int iommufd_copy_dma(int iommufd, uint32_t src_ioas, uint32_t dst_ioas, int iommufd_set_dirty_tracking(int iommufd, uint32_t hwpt_id, bool start); int iommufd_get_dirty_iova(int iommufd, uint32_t hwpt_id, uint64_t iova, ram_addr_t size, uint64_t page_size, uint64_t *data); +int iommufd_unmap_dma_dirty(int iommufd, uint32_t ioas, hwaddr iova, + ram_addr_t size, uint64_t page_size, + uint64_t *data); bool iommufd_supported(void); #endif /* HW_IOMMUFD_IOMMUFD_H */ -- 2.17.2