Re: [PATCH v3 8/15] iommufd: Algorithms for PFN storage

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Oct 25, 2022 at 03:12:17PM -0300, Jason Gunthorpe wrote:

> +int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start_index,
> +			   unsigned long last_index, struct page **out_pages)
> +{
> +	struct interval_tree_double_span_iter span;
> +	unsigned long xa_end = start_index;
> +	struct pfn_reader_user user;
> +	int rc;
> +
> +	pfn_reader_user_init(&user, pages);
> +	user.upages_len = last_index - start_index + 1;

Syzkaller found that upages_len is supposed to be in bytes, not number
of pages. Which is surprising the test suite didn't find it.

-       user.upages_len = last_index - start_index + 1;
+       user.upages_len = (last_index - start_index + 1) * sizeof(*out_pages);

While fixing the test suite to cover this I also discovered this:

@@ -129,7 +129,7 @@ void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter,
                return;
 
        iter->first_index = new_index;
-       if (new_index == iter->last_index) {
+       if (new_index > iter->last_index) {
                iter->is_hole = -1;
                return;
        }

Where the span iterator would not behave properly at its limit,
causing some chaos.

While thinking about iommufd_access_pin_pages(), which is the API that
trigggered all these problems, I realized we need to upgrade the
iova_alignment when an access is created to at least PAGE_SIZE if the
external driver is going to call iommufd_access_pin_pages(). Otherwise
the API can't work right because there is no way to communicate
offsets in the struct pages returned when pinning. Use of RW doesn't
require this so I made it a flag, which is convient for the test suite
that assumes odd alignments for RW testing. See below

Finally, the test suite didn't cover the unmap while access exists
flow, and the code forgot to set the ops pointer:

@@ -435,6 +425,10 @@ iommufd_access_create(struct iommufd_ctx *ictx,
u32 ioas_id,
        if (IS_ERR(access))
                return access;
 
+       access->data = data;
+       access->ops = ops;
+       access->ictx = ictx;

Jason

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index d1af0389dfab83..737897a2dcfc3c 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -26,15 +26,6 @@ struct iommufd_device {
 	bool enforce_cache_coherency;
 };
 
-struct iommufd_access {
-	struct iommufd_object obj;
-	struct iommufd_ctx *ictx;
-	struct iommufd_ioas *ioas;
-	const struct iommufd_access_ops *ops;
-	void *data;
-	u32 ioas_access_list_id;
-};
-
 void iommufd_device_destroy(struct iommufd_object *obj)
 {
 	struct iommufd_device *idev =
@@ -413,8 +404,7 @@ void iommufd_access_destroy_object(struct iommufd_object *obj)
 	struct iommufd_access *access =
 		container_of(obj, struct iommufd_access, obj);
 
-	WARN_ON(xa_erase(&access->ioas->access_list,
-			 access->ioas_access_list_id) != access);
+	iopt_remove_access(&access->ioas->iopt, access);
 	iommufd_ctx_put(access->ictx);
 	refcount_dec(&access->ioas->obj.users);
 }
@@ -435,6 +425,10 @@ iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id,
 	if (IS_ERR(access))
 		return access;
 
+	access->data = data;
+	access->ops = ops;
+	access->ictx = ictx;
+
 	obj = iommufd_get_object(ictx, ioas_id, IOMMUFD_OBJ_IOAS);
 	if (IS_ERR(obj)) {
 		rc = PTR_ERR(obj);
@@ -443,15 +437,16 @@ iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id,
 	access->ioas = container_of(obj, struct iommufd_ioas, obj);
 	iommufd_put_object_keep_user(obj);
 
-	rc = xa_alloc(&access->ioas->access_list, &access->ioas_access_list_id,
-		      access, xa_limit_16b, GFP_KERNEL_ACCOUNT);
+	if (ops->needs_pin_pages)
+		access->iova_alignment = PAGE_SIZE;
+	else
+		access->iova_alignment = 1;
+	rc = iopt_add_access(&access->ioas->iopt, access);
 	if (rc)
 		goto out_put_ioas;
 
 	/* The calling driver is a user until iommufd_access_destroy() */
 	refcount_inc(&access->obj.users);
-	access->ictx = ictx;
-	access->data = data;
 	iommufd_ctx_get(ictx);
 	iommufd_object_finalize(ictx, &access->obj);
 	return access;
@@ -495,18 +490,18 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
 	struct iommufd_access *access;
 	unsigned long index;
 
-	xa_lock(&ioas->access_list);
-	xa_for_each(&ioas->access_list, index, access) {
+	xa_lock(&ioas->iopt.access_list);
+	xa_for_each(&ioas->iopt.access_list, index, access) {
 		if (!iommufd_lock_obj(&access->obj))
 			continue;
-		xa_unlock(&ioas->access_list);
+		xa_unlock(&ioas->iopt.access_list);
 
 		access->ops->unmap(access->data, iova, length);
 
 		iommufd_put_object(&access->obj);
-		xa_lock(&ioas->access_list);
+		xa_lock(&ioas->iopt.access_list);
 	}
-	xa_unlock(&ioas->access_list);
+	xa_unlock(&ioas->iopt.access_list);
 }
 
 /**
@@ -591,6 +586,11 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
 	bool first = true;
 	int rc;
 
+	/* Driver didn't specify needs_pin_pages in its ops */
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+	    WARN_ON(access->iova_alignment != PAGE_SIZE))
+		return -EINVAL;
+
 	if (!length)
 		return -EINVAL;
 	if (check_add_overflow(iova, length - 1, &last_iova))
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index bb5cb19417d696..5d4d48c80d3ad8 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -505,7 +505,8 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start,
 			up_read(&iopt->domains_rwsem);
 			iommufd_access_notify_unmap(iopt, area_first,
 						    iopt_area_length(area));
-			WARN_ON(READ_ONCE(area->num_accesses));
+			if (WARN_ON(READ_ONCE(area->num_accesses)))
+				return -EDEADLOCK;
 			goto again;
 		}
 
@@ -643,6 +644,7 @@ int iopt_init_table(struct io_pagetable *iopt)
 	iopt->allowed_itree = RB_ROOT_CACHED;
 	iopt->reserved_itree = RB_ROOT_CACHED;
 	xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT);
+	xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC);
 
 	/*
 	 * iopt's start as SW tables that can use the entire size_t IOVA space
@@ -669,6 +671,7 @@ void iopt_destroy_table(struct io_pagetable *iopt)
 
 	WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root));
 	WARN_ON(!xa_empty(&iopt->domains));
+	WARN_ON(!xa_empty(&iopt->access_list));
 	WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root));
 }
 
@@ -802,9 +805,12 @@ static int iopt_check_iova_alignment(struct io_pagetable *iopt,
 				     unsigned long new_iova_alignment)
 {
 	unsigned long align_mask = new_iova_alignment - 1;
+	struct iommufd_access *access;
 	struct iopt_area *area;
+	unsigned long index;
 
 	lockdep_assert_held(&iopt->iova_rwsem);
+	lockdep_assert_held(&iopt->domains_rwsem);
 
 	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
 	     area = iopt_area_iter_next(area, 0, ULONG_MAX))
@@ -812,6 +818,12 @@ static int iopt_check_iova_alignment(struct io_pagetable *iopt,
 		    (iopt_area_length(area) & align_mask) ||
 		    (area->page_offset & align_mask))
 			return -EADDRINUSE;
+
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+		xa_for_each(&iopt->access_list, index, access)
+			if (WARN_ON(access->iova_alignment >
+				    new_iova_alignment))
+				return -EADDRINUSE;
 	return 0;
 }
 
@@ -896,10 +908,12 @@ int iopt_table_add_domain(struct io_pagetable *iopt,
 static int iopt_calculate_iova_alignment(struct io_pagetable *iopt)
 {
 	unsigned long new_iova_alignment;
+	struct iommufd_access *access;
 	struct iommu_domain *domain;
 	unsigned long index;
 
 	lockdep_assert_held_write(&iopt->iova_rwsem);
+	lockdep_assert_held(&iopt->domains_rwsem);
 
 	if (iopt->disable_large_pages)
 		new_iova_alignment = PAGE_SIZE;
@@ -910,6 +924,11 @@ static int iopt_calculate_iova_alignment(struct io_pagetable *iopt)
 		new_iova_alignment = max_t(unsigned long,
 					   1UL << __ffs(domain->pgsize_bitmap),
 					   new_iova_alignment);
+	xa_for_each(&iopt->access_list, index, access)
+		new_iova_alignment = max_t(unsigned long,
+					   access->iova_alignment,
+					   new_iova_alignment);
+
 	if (new_iova_alignment > iopt->iova_alignment) {
 		int rc;
 
@@ -1106,6 +1125,41 @@ int iopt_disable_large_pages(struct io_pagetable *iopt)
 	return rc;
 }
 
+int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access)
+{
+	int rc;
+
+	down_write(&iopt->domains_rwsem);
+	down_write(&iopt->iova_rwsem);
+	rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access,
+		      xa_limit_16b, GFP_KERNEL_ACCOUNT);
+	if (rc)
+		goto out_unlock;
+
+	rc = iopt_calculate_iova_alignment(iopt);
+	if (rc) {
+		xa_erase(&iopt->access_list, access->iopt_access_list_id);
+		goto out_unlock;
+	}
+
+out_unlock:
+	up_write(&iopt->iova_rwsem);
+	up_write(&iopt->domains_rwsem);
+	return rc;
+}
+
+void iopt_remove_access(struct io_pagetable *iopt,
+			struct iommufd_access *access)
+{
+	down_write(&iopt->domains_rwsem);
+	down_write(&iopt->iova_rwsem);
+	WARN_ON(xa_erase(&iopt->access_list, access->iopt_access_list_id) !=
+		access);
+	WARN_ON(iopt_calculate_iova_alignment(iopt));
+	up_write(&iopt->iova_rwsem);
+	up_write(&iopt->domains_rwsem);
+}
+
 /* Narrow the valid_iova_itree to include reserved ranges from a group. */
 int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
 					  struct device *device,
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
index c32a87f11c55be..068055272fc5b5 100644
--- a/drivers/iommu/iommufd/ioas.c
+++ b/drivers/iommu/iommufd/ioas.c
@@ -17,7 +17,6 @@ void iommufd_ioas_destroy(struct iommufd_object *obj)
 	rc = iopt_unmap_all(&ioas->iopt, NULL);
 	WARN_ON(rc && rc != -ENOENT);
 	iopt_destroy_table(&ioas->iopt);
-	WARN_ON(!xa_empty(&ioas->access_list));
 	mutex_destroy(&ioas->mutex);
 }
 
@@ -36,7 +35,6 @@ struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx)
 
 	INIT_LIST_HEAD(&ioas->hwpt_list);
 	mutex_init(&ioas->mutex);
-	xa_init_flags(&ioas->access_list, XA_FLAGS_ALLOC);
 	return ioas;
 
 out_abort:
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 47d18388dc24fa..783fbbf0b732d4 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -34,6 +34,7 @@ struct iommufd_ctx {
 struct io_pagetable {
 	struct rw_semaphore domains_rwsem;
 	struct xarray domains;
+	struct xarray access_list;
 	unsigned int next_domain_id;
 
 	struct rw_semaphore iova_rwsem;
@@ -205,7 +206,6 @@ struct iommufd_ioas {
 	struct io_pagetable iopt;
 	struct mutex mutex;
 	struct list_head hwpt_list;
-	struct xarray access_list;
 };
 
 static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ucmd *ucmd,
@@ -256,10 +256,22 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
 
 void iommufd_device_destroy(struct iommufd_object *obj);
 
+struct iommufd_access {
+	struct iommufd_object obj;
+	struct iommufd_ctx *ictx;
+	struct iommufd_ioas *ioas;
+	const struct iommufd_access_ops *ops;
+	void *data;
+	unsigned long iova_alignment;
+	u32 iopt_access_list_id;
+};
+
+int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
+void iopt_remove_access(struct io_pagetable *iopt,
+			struct iommufd_access *access);
 void iommufd_access_destroy_object(struct iommufd_object *obj);
 
 #ifdef CONFIG_IOMMUFD_TEST
-struct iommufd_access;
 struct iommufd_hw_pagetable *
 iommufd_device_selftest_attach(struct iommufd_ctx *ictx,
 			       struct iommufd_ioas *ioas,
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index fc253a4d2f8e77..7a5d64a1dae482 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -29,6 +29,7 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id,
 void iommufd_device_detach(struct iommufd_device *idev);
 
 struct iommufd_access_ops {
+	u8 needs_pin_pages : 1;
 	void (*unmap)(void *data, unsigned long iova, unsigned long length);
 };
 



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux