Add the correspondent APIs for performing VFIO dirty tracking, particularly VFIO_IOMMU_DIRTY_PAGES ioctl subcmds: * VFIO_IOMMU_DIRTY_PAGES_FLAG_START: Start dirty tracking and allocates the area @dirty_bitmap * VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP: Stop dirty tracking and frees the area @dirty_bitmap * VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP: Fetch dirty bitmap while dirty tracking is active. Advertise the VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION whereas it gets set the domain configured page size the same as iopt::iova_alignment and maximum dirty bitmap size same as VFIO. Compared to VFIO type1 iommu, the perpectual dirtying is not implemented and userspace gets -EOPNOTSUPP which is handled by today's userspace. Move iommufd_get_pagesizes() definition prior to unmap for iommufd_vfio_unmap_dma() dirty support to validate the user bitmap page size against IOPT pagesize. Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx> --- drivers/iommu/iommufd/vfio_compat.c | 221 ++++++++++++++++++++++++++-- 1 file changed, 209 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index dbe39404a105..2802f49cc10d 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -56,6 +56,16 @@ create_compat_ioas(struct iommufd_ctx *ictx) return ioas; } +static u64 iommufd_get_pagesizes(struct iommufd_ioas *ioas) +{ + /* FIXME: See vfio_update_pgsize_bitmap(), for compat this should return + * the high bits too, and we need to decide if we should report that + * iommufd supports less than PAGE_SIZE alignment or stick to strict + * compatibility. qemu only cares about the first set bit. + */ + return ioas->iopt.iova_alignment; +} + int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) { struct iommu_vfio_ioas *cmd = ucmd->cmd; @@ -130,9 +140,14 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd, void __user *arg) { size_t minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); - u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL; + u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL | + VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP; + struct iommufd_dirty_data dirty, *dirtyp = NULL; struct vfio_iommu_type1_dma_unmap unmap; + struct vfio_bitmap bitmap; struct iommufd_ioas *ioas; + unsigned long pgshift; + size_t pgsize; int rc; if (copy_from_user(&unmap, arg, minsz)) @@ -141,14 +156,53 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd, if (unmap.argsz < minsz || unmap.flags & ~supported_flags) return -EINVAL; + if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { + unsigned long npages; + + if (copy_from_user(&bitmap, + (void __user *)(arg + minsz), + sizeof(bitmap))) + return -EFAULT; + + if (!access_ok((void __user *)bitmap.data, bitmap.size)) + return -EINVAL; + + pgshift = __ffs(bitmap.pgsize); + npages = unmap.size >> pgshift; + + if (!npages || !bitmap.size || + (bitmap.size > DIRTY_BITMAP_SIZE_MAX) || + (bitmap.size < dirty_bitmap_bytes(npages))) + return -EINVAL; + + dirty.iova = unmap.iova; + dirty.length = unmap.size; + dirty.data = bitmap.data; + dirty.page_size = 1 << pgshift; + dirtyp = &dirty; + } + ioas = get_compat_ioas(ictx); if (IS_ERR(ioas)) return PTR_ERR(ioas); + pgshift = __ffs(iommufd_get_pagesizes(ioas)), + pgsize = (size_t)1 << pgshift; + + /* When dirty tracking is enabled, allow only min supported pgsize */ + if ((unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) && + (bitmap.pgsize != pgsize)) { + rc = -EINVAL; + goto out_put; + } + if (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL) rc = iopt_unmap_all(&ioas->iopt); else - rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size, NULL); + rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size, + dirtyp); + +out_put: iommufd_put_object(&ioas->obj); return rc; } @@ -222,16 +276,6 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) return 0; } -static u64 iommufd_get_pagesizes(struct iommufd_ioas *ioas) -{ - /* FIXME: See vfio_update_pgsize_bitmap(), for compat this should return - * the high bits too, and we need to decide if we should report that - * iommufd supports less than PAGE_SIZE alignment or stick to strict - * compatibility. qemu only cares about the first set bit. - */ - return ioas->iopt.iova_alignment; -} - static int iommufd_fill_cap_iova(struct iommufd_ioas *ioas, struct vfio_info_cap_header __user *cur, size_t avail) @@ -289,6 +333,26 @@ static int iommufd_fill_cap_dma_avail(struct iommufd_ioas *ioas, return sizeof(cap_dma); } +static int iommufd_fill_cap_migration(struct iommufd_ioas *ioas, + struct vfio_info_cap_header __user *cur, + size_t avail) +{ + struct vfio_iommu_type1_info_cap_migration cap_mig = { + .header = { + .id = VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION, + .version = 1, + }, + .flags = 0, + .pgsize_bitmap = (size_t) 1 << __ffs(iommufd_get_pagesizes(ioas)), + .max_dirty_bitmap_size = DIRTY_BITMAP_SIZE_MAX, + }; + + if (avail >= sizeof(cap_mig) && + copy_to_user(cur, &cap_mig, sizeof(cap_mig))) + return -EFAULT; + return sizeof(cap_mig); +} + static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, void __user *arg) { @@ -298,6 +362,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, static const fill_cap_fn fill_fns[] = { iommufd_fill_cap_iova, iommufd_fill_cap_dma_avail, + iommufd_fill_cap_migration, }; size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); struct vfio_info_cap_header __user *last_cap = NULL; @@ -364,6 +429,137 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, return rc; } +static int iommufd_vfio_dirty_pages_start(struct iommufd_ctx *ictx, + struct vfio_iommu_type1_dirty_bitmap *dirty) +{ + struct iommufd_ioas *ioas; + int ret = -EINVAL; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + ret = iopt_set_dirty_tracking(&ioas->iopt, NULL, true); + + iommufd_put_object(&ioas->obj); + + return ret; +} + +static int iommufd_vfio_dirty_pages_stop(struct iommufd_ctx *ictx, + struct vfio_iommu_type1_dirty_bitmap *dirty) +{ + struct iommufd_ioas *ioas; + int ret; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + ret = iopt_set_dirty_tracking(&ioas->iopt, NULL, false); + + iommufd_put_object(&ioas->obj); + + return ret; +} + +static int iommufd_vfio_dirty_pages_get_bitmap(struct iommufd_ctx *ictx, + struct vfio_iommu_type1_dirty_bitmap_get *range) +{ + struct iommufd_dirty_data bitmap; + uint64_t npages, bitmap_size; + struct iommufd_ioas *ioas; + unsigned long pgshift; + size_t iommu_pgsize; + int ret = -EINVAL; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + down_read(&ioas->iopt.iova_rwsem); + pgshift = __ffs(range->bitmap.pgsize); + npages = range->size >> pgshift; + bitmap_size = range->bitmap.size; + + if (!npages || !bitmap_size || (bitmap_size > DIRTY_BITMAP_SIZE_MAX) || + (bitmap_size < dirty_bitmap_bytes(npages))) + goto out_put; + + iommu_pgsize = 1 << __ffs(iommufd_get_pagesizes(ioas)); + + /* allow only smallest supported pgsize */ + if (range->bitmap.pgsize != iommu_pgsize) + goto out_put; + + if (range->iova & (iommu_pgsize - 1)) + goto out_put; + + if (!range->size || range->size & (iommu_pgsize - 1)) + goto out_put; + + bitmap.iova = range->iova; + bitmap.length = range->size; + bitmap.data = range->bitmap.data; + bitmap.page_size = 1 << pgshift; + + ret = iopt_read_and_clear_dirty_data(&ioas->iopt, NULL, &bitmap); + +out_put: + up_read(&ioas->iopt.iova_rwsem); + iommufd_put_object(&ioas->obj); + return ret; +} + +static int iommufd_vfio_dirty_pages(struct iommufd_ctx *ictx, unsigned int cmd, + void __user *arg) +{ + size_t minsz = offsetofend(struct vfio_iommu_type1_dirty_bitmap, flags); + struct vfio_iommu_type1_dirty_bitmap dirty; + u32 supported_flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START | + VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP | + VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP; + int ret = 0; + + if (copy_from_user(&dirty, (void __user *)arg, minsz)) + return -EFAULT; + + if (dirty.argsz < minsz || dirty.flags & ~supported_flags) + return -EINVAL; + + /* only one flag should be set at a time */ + if (__ffs(dirty.flags) != __fls(dirty.flags)) + return -EINVAL; + + if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { + ret = iommufd_vfio_dirty_pages_start(ictx, &dirty); + } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) { + ret = iommufd_vfio_dirty_pages_stop(ictx, &dirty); + } else if (dirty.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { + struct vfio_iommu_type1_dirty_bitmap_get range; + size_t data_size = dirty.argsz - minsz; + + if (!data_size || data_size < sizeof(range)) + return -EINVAL; + + if (copy_from_user(&range, (void __user *)(arg + minsz), + sizeof(range))) + return -EFAULT; + + if (range.iova + range.size < range.iova) + return -EINVAL; + + if (!access_ok((void __user *)range.bitmap.data, + range.bitmap.size)) + return -EINVAL; + + ret = iommufd_vfio_dirty_pages_get_bitmap(ictx, &range); + } + + return ret; +} + + /* FIXME TODO: PowerPC SPAPR only: #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) @@ -394,6 +590,7 @@ int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, case VFIO_IOMMU_UNMAP_DMA: return iommufd_vfio_unmap_dma(ictx, cmd, uarg); case VFIO_IOMMU_DIRTY_PAGES: + return iommufd_vfio_dirty_pages(ictx, cmd, uarg); default: return -ENOIOCTLCMD; } -- 2.17.2