Re: [PATCH Kernel v20 5/8] vfio iommu: Implementation of ioctl for dirty pages tracking

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 5/15/2020 8:45 PM, Alex Williamson wrote:
On Fri, 15 May 2020 16:44:38 +0530
Kirti Wankhede <kwankhede@xxxxxxxxxx> wrote:

On 5/15/2020 3:35 PM, Yan Zhao wrote:
On Fri, May 15, 2020 at 02:07:44AM +0530, Kirti Wankhede wrote:
VFIO_IOMMU_DIRTY_PAGES ioctl performs three operations:
- Start dirty pages tracking while migration is active
- Stop dirty pages tracking.
- Get dirty pages bitmap. Its user space application's responsibility to
    copy content of dirty pages from source to destination during migration.

To prevent DoS attack, memory for bitmap is allocated per vfio_dma
structure. Bitmap size is calculated considering smallest supported page
size. Bitmap is allocated for all vfio_dmas when dirty logging is enabled

Bitmap is populated for already pinned pages when bitmap is allocated for
a vfio_dma with the smallest supported page size. Update bitmap from
pinning functions when tracking is enabled. When user application queries
bitmap, check if requested page size is same as page size used to
populated bitmap. If it is equal, copy bitmap, but if not equal, return
error.

Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx>
Reviewed-by: Neo Jia <cjia@xxxxxxxxxx>

Fixed error reported by build bot by changing pgsize type from uint64_t
to size_t.
Reported-by: kbuild test robot <lkp@xxxxxxxxx>
---
   drivers/vfio/vfio_iommu_type1.c | 294 +++++++++++++++++++++++++++++++++++++++-
   1 file changed, 288 insertions(+), 6 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index de17787ffece..b76d3b14abfd 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -72,6 +72,7 @@ struct vfio_iommu {
   	uint64_t		pgsize_bitmap;
   	bool			v2;
   	bool			nesting;
+	bool			dirty_page_tracking;
   };
struct vfio_domain {
@@ -92,6 +93,7 @@ struct vfio_dma {
   	bool			lock_cap;	/* capable(CAP_IPC_LOCK) */
   	struct task_struct	*task;
   	struct rb_root		pfn_list;	/* Ex-user pinned pfn list */
+	unsigned long		*bitmap;
   };
struct vfio_group {
@@ -126,6 +128,19 @@ struct vfio_regions {
   #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)	\
   					(!list_empty(&iommu->domain_list))
+#define DIRTY_BITMAP_BYTES(n) (ALIGN(n, BITS_PER_TYPE(u64)) / BITS_PER_BYTE)
+
+/*
+ * Input argument of number of bits to bitmap_set() is unsigned integer, which
+ * further casts to signed integer for unaligned multi-bit operation,
+ * __bitmap_set().
+ * Then maximum bitmap size supported is 2^31 bits divided by 2^3 bits/byte,
+ * that is 2^28 (256 MB) which maps to 2^31 * 2^12 = 2^43 (8TB) on 4K page
+ * system.
+ */
+#define DIRTY_BITMAP_PAGES_MAX	 ((u64)INT_MAX)
+#define DIRTY_BITMAP_SIZE_MAX	 DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX)
+
   static int put_pfn(unsigned long pfn, int prot);
/*
@@ -176,6 +191,74 @@ static void vfio_unlink_dma(struct vfio_iommu *iommu, struct vfio_dma *old)
   	rb_erase(&old->node, &iommu->dma_list);
   }
+
+static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize)
+{
+	uint64_t npages = dma->size / pgsize;
+
+	if (npages > DIRTY_BITMAP_PAGES_MAX)
+		return -EINVAL;
+
+	dma->bitmap = kvzalloc(DIRTY_BITMAP_BYTES(npages), GFP_KERNEL);
+	if (!dma->bitmap)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void vfio_dma_bitmap_free(struct vfio_dma *dma)
+{
+	kfree(dma->bitmap);
+	dma->bitmap = NULL;
+}
+
+static void vfio_dma_populate_bitmap(struct vfio_dma *dma, size_t pgsize)
+{
+	struct rb_node *p;
+
+	for (p = rb_first(&dma->pfn_list); p; p = rb_next(p)) {
+		struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn, node);
+
+		bitmap_set(dma->bitmap, (vpfn->iova - dma->iova) / pgsize, 1);
+	}
+}
+
+static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize)
+{
+	struct rb_node *n = rb_first(&iommu->dma_list);
+
+	for (; n; n = rb_next(n)) {
+		struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
+		int ret;
+
+		ret = vfio_dma_bitmap_alloc(dma, pgsize);
+		if (ret) {
+			struct rb_node *p = rb_prev(n);
+
+			for (; p; p = rb_prev(p)) {
+				struct vfio_dma *dma = rb_entry(n,
+							struct vfio_dma, node);
+
+				vfio_dma_bitmap_free(dma);
+			}
+			return ret;
+		}
+		vfio_dma_populate_bitmap(dma, pgsize);
+	}
+	return 0;
+}
+
+static void vfio_dma_bitmap_free_all(struct vfio_iommu *iommu)
+{
+	struct rb_node *n = rb_first(&iommu->dma_list);
+
+	for (; n; n = rb_next(n)) {
+		struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
+
+		vfio_dma_bitmap_free(dma);
+	}
+}
+
   /*
    * Helper Functions for host iova-pfn list
    */
@@ -568,6 +651,17 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
   			vfio_unpin_page_external(dma, iova, do_accounting);
   			goto pin_unwind;
   		}
+
+		if (iommu->dirty_page_tracking) {
+			unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
+
+			/*
+			 * Bitmap populated with the smallest supported page
+			 * size
+			 */
+			bitmap_set(dma->bitmap,
+				   (vpfn->iova - dma->iova) >> pgshift, 1);
+		}
   	}
ret = i;
@@ -802,6 +896,7 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
   	vfio_unmap_unpin(iommu, dma, true);
   	vfio_unlink_dma(iommu, dma);
   	put_task_struct(dma->task);
+	vfio_dma_bitmap_free(dma);
   	kfree(dma);
   	iommu->dma_avail++;
   }
@@ -829,6 +924,80 @@ static void vfio_pgsize_bitmap(struct vfio_iommu *iommu)
   	}
   }
+static int update_user_bitmap(u64 __user *bitmap, struct vfio_dma *dma,
+			      dma_addr_t base_iova, size_t pgsize)
+{
+	unsigned long pgshift = __ffs(pgsize);
+	unsigned long nbits = dma->size >> pgshift;
+	unsigned long bit_offset = (dma->iova - base_iova) >> pgshift;
+	unsigned long copy_offset = bit_offset / BITS_PER_LONG;
+	unsigned long shift = bit_offset % BITS_PER_LONG;
+	unsigned long leftover;
+
+	if (shift) {
+		bitmap_shift_left(dma->bitmap, dma->bitmap, shift,
+				  nbits + shift);
+
+		if (copy_from_user(&leftover, (u64 *)bitmap + copy_offset,
+				   sizeof(leftover)))
+			return -EFAULT;
+
+		bitmap_or(dma->bitmap, dma->bitmap, &leftover, shift);
+	}
+
+	if (copy_to_user((u64 *)bitmap + copy_offset, dma->bitmap,
+			 DIRTY_BITMAP_BYTES(nbits + shift)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
+				  dma_addr_t iova, size_t size, size_t pgsize)
+{
+	struct vfio_dma *dma;
+	dma_addr_t i = iova, limit = iova + size;
+	unsigned long pgshift = __ffs(pgsize);
+	size_t sz = size;
+	int ret;
+
+	while ((dma = vfio_find_dma(iommu, i, sz))) {
not quite get the logic here.
if (i, i + size) is intersecting with (dma->iova, dma->iova + dma->size),
and a dma is found here, why the whole bitmap is cleared and copied?

This works with multiple but full vfio_dma, not intersects of vfio_dma,
similar to unmap ioctl.

I don't see that the VFIO_IOMMU_DIRTY_PAGES ioctl validates that like
VFIO_IOMMU_UNMAP_DMA does though.  We should have the same test as
vfio_dma_do_unmap() to verify that they user range doesn't bisect a
mapping, otherwise Yan is right, it looks like we allow the user to
specify an arbitrary range that might bisect a bitmap, but we clear and
attempt to copy the entire bitmap to the user buffer regardless.

Sure, updating patch. I'll send next version in some time.

Thanks,
Kirti



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux