On Tue, 10 Apr 2018 16:18:59 +0800 Yulei Zhang <yulei.zhang@xxxxxxxxx> wrote: > Corresponding to the V4 migration patch set for vfio pci device, > this patch is to implement the new ioctl VFIO_IOMMU_GET_DIRTY_BITMAP > to fulfill the requirement for vfio-mdev device live migration, which > need copy the memory that has been pinned in iommu container to the > target VM for mdev device status restore. > > Signed-off-by: Yulei Zhang <yulei.zhang@xxxxxxxxx> > --- > drivers/vfio/vfio_iommu_type1.c | 42 +++++++++++++++++++++++++++++++++++++++++ > include/uapi/linux/vfio.h | 14 ++++++++++++++ > 2 files changed, 56 insertions(+) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 5c212bf..6cd2142 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -41,6 +41,7 @@ > #include <linux/notifier.h> > #include <linux/dma-iommu.h> > #include <linux/irqdomain.h> > +#include <linux/vmalloc.h> > > #define DRIVER_VERSION "0.2" > #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@xxxxxxxxxx>" > @@ -1658,6 +1659,23 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) > return ret; > } > > +static void vfio_dma_update_dirty_bitmap(struct vfio_iommu *iommu, > + u64 start_addr, u64 npage, void *bitmap) > +{ > + u64 iova = start_addr; > + struct vfio_dma *dma; > + int i; > + > + for (i = 0; i < npage; i++) { > + dma = vfio_find_dma(iommu, iova, PAGE_SIZE); > + if (dma) > + if (vfio_find_vpfn(dma, iova)) > + set_bit(i, bitmap); This seems to conflate the vendor driver working data set with the dirty data set, is that valid? > + > + iova += PAGE_SIZE; > + } > +} > + > static long vfio_iommu_type1_ioctl(void *iommu_data, > unsigned int cmd, unsigned long arg) > { > @@ -1728,6 +1746,30 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, > > return copy_to_user((void __user *)arg, &unmap, minsz) ? > -EFAULT : 0; > + } else if (cmd == VFIO_IOMMU_GET_DIRTY_BITMAP) { > + struct vfio_iommu_get_dirty_bitmap d; > + unsigned long bitmap_sz; > + unsigned int *bitmap; > + > + minsz = offsetofend(struct vfio_iommu_get_dirty_bitmap, > + page_nr); > + > + if (copy_from_user(&d, (void __user *)arg, minsz)) > + return -EFAULT; > + > + bitmap_sz = (BITS_TO_LONGS(d.page_nr) + 1) * > + sizeof(unsigned long); > + bitmap = vzalloc(bitmap_sz); This is an exploit waiting to happen, a kernel allocation based on a user provided field with no limit or bounds checking. > + vfio_dma_update_dirty_bitmap(iommu, d.start_addr, > + d.page_nr, bitmap); > + > + if (copy_to_user((void __user *)arg + minsz, > + bitmap, bitmap_sz)) { > + vfree(bitmap); > + return -EFAULT; > + } > + vfree(bitmap); > + return 0; > } > > return -ENOTTY; > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index 1aa7b82..d4fd5af 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -665,6 +665,20 @@ struct vfio_iommu_type1_dma_unmap { > #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) > #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) > > +/** > + * VFIO_IOMMU_GET_DIRTY_BITMAP - _IOW(VFIO_TYPE, VFIO_BASE + 17, > + * struct vfio_iommu_get_dirty_bitmap) > + * > + * Return: 0 on success, -errno on failure. > + */ > +struct vfio_iommu_get_dirty_bitmap { > + __u64 start_addr; > + __u64 page_nr; > + __u8 dirty_bitmap[]; > +}; This does not follow the vfio standard calling convention of argsz and flags. Do we even an ioctl here or could we use a region for exposing a dirty bitmap? Juan, any input on better options than bitmaps? Thanks, Alex > + > +#define VFIO_IOMMU_GET_DIRTY_BITMAP _IO(VFIO_TYPE, VFIO_BASE + 17) > + > /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ > > /*