Re: [PATCH v3 15/15] iommufd: Add a selftest

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Oct 25, 2022 at 03:12:24PM -0300, Jason Gunthorpe wrote:

> +static void iommufd_test_access_unmap(void *data, unsigned long iova,
> +				      unsigned long length)
> +{
> +	unsigned long iova_last = iova + length - 1;
> +	struct selftest_access *staccess = data;
> +	struct selftest_access_item *item;
> +	struct selftest_access_item *tmp;
> +
> +	spin_lock(&staccess->lock);
> +	list_for_each_entry_safe(item, tmp, &staccess->items, items_elm) {
> +		if (iova > item->iova_end || iova_last < item->iova)
> +			continue;
> +		list_del(&item->items_elm);
> +		spin_unlock(&staccess->lock);
> +		iommufd_access_unpin_pages(staccess->access, item->iova,
> +					   item->length);
> +		kfree(item);
> +		spin_lock(&staccess->lock);
> +	}
> +	spin_unlock(&staccess->lock);
> +}

> +static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
> +				     unsigned int access_id, unsigned long iova,
> +				     size_t length, void __user *uptr,
> +				     u32 flags)
> +{
> +	struct iommu_test_cmd *cmd = ucmd->cmd;
> +	struct selftest_access_item *item;
> +	struct selftest_access *staccess;
> +	struct page **pages;
> +	size_t npages;
> +	int rc;
> +
> +	if (flags & ~MOCK_FLAGS_ACCESS_WRITE)
> +		return -EOPNOTSUPP;
> +
> +	staccess = iommufd_access_get(access_id);
> +	if (IS_ERR(staccess))
> +		return PTR_ERR(staccess);
> +
> +	npages = (ALIGN(iova + length, PAGE_SIZE) -
> +		  ALIGN_DOWN(iova, PAGE_SIZE)) /
> +		 PAGE_SIZE;
> +	pages = kvcalloc(npages, sizeof(*pages), GFP_KERNEL_ACCOUNT);
> +	if (!pages) {
> +		rc = -ENOMEM;
> +		goto out_put;
> +	}
> +
> +	rc = iommufd_access_pin_pages(staccess->access, iova, length, pages,
> +				      flags & MOCK_FLAGS_ACCESS_WRITE);
> +	if (rc)
> +		goto out_free_pages;
> +
> +	rc = iommufd_test_check_pages(
> +		uptr - (iova - ALIGN_DOWN(iova, PAGE_SIZE)), pages, npages);
> +	if (rc)
> +		goto out_unaccess;
> +
> +	item = kzalloc(sizeof(*item), GFP_KERNEL_ACCOUNT);
> +	if (!item) {
> +		rc = -ENOMEM;
> +		goto out_unaccess;
> +	}
> +
> +	item->iova = iova;
> +	item->length = length;
> +	spin_lock(&staccess->lock);
> +	item->id = staccess->next_id++;
> +	list_add_tail(&item->items_elm, &staccess->items);
> +	spin_unlock(&staccess->lock);

I haven't been remarking on the bugs that syzkaller finds in the test
suite itself (sigh), but this one is surprising and complicated enough
to deserve some wider attention.

VFIO has a protocol which has been mapped into iommufd allowing an
external driver to convert IOVA to struct pages *. iommufd natively
represents this as the sequence:

  access = iommufd_access_create(ops)
  iommufd_access_pin_pages(access, iova, length, pages)
  iommufd_access_unpin_pages(access, iova, length)

One of the quirks of the VFIO design is that if userspace does an
unmap then the unmap shall succeed, but like in a HW iommu, the above
pin_pages is revoked and the external driver must stop accessing that
memory. iommufd achieves this by calling a callback:

static const struct iommufd_access_ops selftest_access_ops = {
	.unmap = iommufd_test_access_unmap,
};

Which has the invariant that upon return the unpin_pages must be
completed.

This all sounds simple enough, but when you throw syzkalller at this
and it generates all kinds of races it generates something like this:

            CPU1                         CPU2                 CPU3
    iommufd_access_create()
    iommufd_access_pin_pages()
                                       unmap_all()
                                         iommufd_test_access_unmap()
                                                            unmap_all()
                                                             iommufd_test_access_unmap()

    spin_lock(&staccess->lock);
    list_add_tail(&item->items_elm, &staccess->items);

And of course since the list_add_tail is in the wrong order it means
iommufd_test_access_unmap() doesn't see it and doesn't undo it,
triggering a WARN_ON.

The only way I can see to solve this is to hold a serializing lock
across iommufd_access_pin_pages() so that neither
iommufd_test_access_unmap() can progress until both the pin is
completed and the record of the pin is stored.

Fortunately in the iommufd design we can hold a lock like this across
these calls, and in the op callback, without deadlocking. I can't
recall if vfio can do the same, I suspect not since I had in my mind I
needed to avoid that kind of locking for deadlock reasons..

I doubt any mdev drivers do this properly, so this will be some
oddball bugs. Thankfully it doesn't harm kernel integrity, but it does
leave a mess for a userspace vIOMMU which is tracking a guest command
to unmap an IOVA range and the kernel chucked out a WARN_ON and told
it EDEADLOCK. I guess sleep and retry?

Anyhow, the below seems to have fixed it. And this is the last open
syzkaller bug, the rest were dups of the prior one. Now we wait for it
to find something else.

Jason

@@ -420,7 +420,7 @@ static int iommufd_test_md_check_refs(struct iommufd_ucmd *ucmd,
 struct selftest_access {
 	struct iommufd_access *access;
 	struct file *file;
-	spinlock_t lock;
+	struct mutex lock;
 	struct list_head items;
 	unsigned int next_id;
 	bool destroying;
@@ -458,19 +458,17 @@ static void iommufd_test_access_unmap(void *data, unsigned long iova,
 	struct selftest_access_item *item;
 	struct selftest_access_item *tmp;
 
-	spin_lock(&staccess->lock);
+	mutex_lock(&staccess->lock);
 	list_for_each_entry_safe(item, tmp, &staccess->items, items_elm) {
 		if (iova > item->iova + item->length - 1 ||
 		    iova_last < item->iova)
 			continue;
 		list_del(&item->items_elm);
-		spin_unlock(&staccess->lock);
 		iommufd_access_unpin_pages(staccess->access, item->iova,
 					   item->length);
 		kfree(item);
-		spin_lock(&staccess->lock);
 	}
-	spin_unlock(&staccess->lock);
+	mutex_unlock(&staccess->lock);
 }
 
 static int iommufd_test_access_item_destroy(struct iommufd_ucmd *ucmd,
@@ -484,19 +482,19 @@ static int iommufd_test_access_item_destroy(struct iommufd_ucmd *ucmd,
 	if (IS_ERR(staccess))
 		return PTR_ERR(staccess);
 
-	spin_lock(&staccess->lock);
+	mutex_lock(&staccess->lock);
 	list_for_each_entry(item, &staccess->items, items_elm) {
 		if (item->id == item_id) {
 			list_del(&item->items_elm);
-			spin_unlock(&staccess->lock);
 			iommufd_access_unpin_pages(staccess->access, item->iova,
 						   item->length);
+			mutex_unlock(&staccess->lock);
 			kfree(item);
 			fput(staccess->file);
 			return 0;
 		}
 	}
-	spin_unlock(&staccess->lock);
+	mutex_unlock(&staccess->lock);
 	fput(staccess->file);
 	return -ENOENT;
 }
@@ -510,6 +508,7 @@ static int iommufd_test_staccess_release(struct inode *inode,
 		iommufd_test_access_unmap(staccess, 0, ULONG_MAX);
 		iommufd_access_destroy(staccess->access);
 	}
+	mutex_destroy(&staccess->lock);
 	kfree(staccess);
 	return 0;
 }
@@ -536,7 +535,7 @@ static struct selftest_access *iommufd_test_alloc_access(void)
 	if (!staccess)
 		return ERR_PTR(-ENOMEM);
 	INIT_LIST_HEAD(&staccess->items);
-	spin_lock_init(&staccess->lock);
+	mutex_init(&staccess->lock);
 
 	filep = anon_inode_getfile("[iommufd_test_staccess]",
 				   &iommfd_test_staccess_fops, staccess,
@@ -662,10 +661,20 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
 		goto out_put;
 	}
 
+	/*
+	 * Drivers will need to think very carefully about this locking. The
+	 * core code can do multiple unmaps instantaneously after
+	 * iommufd_access_pin_pages() and *all* the unmaps must not return until
+	 * the range is unpinned. This simple implementation puts a global lock
+	 * around the pin, which may not suit drivers that want this to be a
+	 * performance path. drivers that get this wrong will trigger WARN_ON
+	 * races and cause EDEADLOCK failures to userspace.
+	 */
+	mutex_lock(&staccess->lock);
 	rc = iommufd_access_pin_pages(staccess->access, iova, length, pages,
 				      flags & MOCK_FLAGS_ACCESS_WRITE);
 	if (rc)
-		goto out_free_pages;
+		goto out_unlock;
 
 	/* For syzkaller allow uptr to be NULL to skip this check */
 	if (uptr) {
@@ -684,25 +693,22 @@ static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
 
 	item->iova = iova;
 	item->length = length;
-	spin_lock(&staccess->lock);
 	item->id = staccess->next_id++;
 	list_add_tail(&item->items_elm, &staccess->items);
-	spin_unlock(&staccess->lock);
 
 	cmd->access_pages.out_access_pages_id = item->id;
 	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
 	if (rc)
 		goto out_free_item;
-	goto out_free_pages;
+	goto out_unlock;
 
 out_free_item:
-	spin_lock(&staccess->lock);
 	list_del(&item->items_elm);
-	spin_unlock(&staccess->lock);
 	kfree(item);
 out_unaccess:
 	iommufd_access_unpin_pages(staccess->access, iova, length);
-out_free_pages:
+out_unlock:
+	mutex_unlock(&staccess->lock);
 	kvfree(pages);
 out_put:
 	fput(staccess->file);



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux