Re: [PATCH V2 9/9] vfio/type1: block on invalid vaddr

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 19 Jan 2021 09:48:29 -0800
Steve Sistare <steven.sistare@xxxxxxxxxx> wrote:

> Block translation of host virtual address while an iova range has an
> invalid vaddr.
> 
> Signed-off-by: Steve Sistare <steven.sistare@xxxxxxxxxx>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 83 +++++++++++++++++++++++++++++++++++++----
>  1 file changed, 76 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 0167996..c97573a 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -31,6 +31,7 @@
>  #include <linux/rbtree.h>
>  #include <linux/sched/signal.h>
>  #include <linux/sched/mm.h>
> +#include <linux/kthread.h>
>  #include <linux/slab.h>
>  #include <linux/uaccess.h>
>  #include <linux/vfio.h>
> @@ -75,6 +76,7 @@ struct vfio_iommu {
>  	bool			dirty_page_tracking;
>  	bool			pinned_page_dirty_scope;
>  	bool			controlled;
> +	wait_queue_head_t	vaddr_wait;
>  };
>  
>  struct vfio_domain {
> @@ -145,6 +147,8 @@ struct vfio_regions {
>  #define DIRTY_BITMAP_PAGES_MAX	 ((u64)INT_MAX)
>  #define DIRTY_BITMAP_SIZE_MAX	 DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX)
>  
> +#define WAITED 1
> +
>  static int put_pfn(unsigned long pfn, int prot);
>  
>  static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
> @@ -505,6 +509,52 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
>  }
>  
>  /*
> + * Wait for vaddr of *dma_p to become valid.  iommu lock is dropped if the task
> + * waits, but is re-locked on return.  If the task waits, then return an updated
> + * dma struct in *dma_p.  Return 0 on success with no waiting, 1 on success if

s/1/WAITED/

> + * waited, and -errno on error.
> + */
> +static int vfio_vaddr_wait(struct vfio_iommu *iommu, struct vfio_dma **dma_p)
> +{
> +	struct vfio_dma *dma = *dma_p;
> +	unsigned long iova = dma->iova;
> +	size_t size = dma->size;
> +	int ret = 0;
> +	DEFINE_WAIT(wait);
> +
> +	while (!dma->vaddr_valid) {
> +		ret = WAITED;
> +		prepare_to_wait(&iommu->vaddr_wait, &wait, TASK_KILLABLE);
> +		mutex_unlock(&iommu->lock);
> +		schedule();
> +		mutex_lock(&iommu->lock);
> +		finish_wait(&iommu->vaddr_wait, &wait);
> +		if (kthread_should_stop() || !iommu->controlled ||
> +		    fatal_signal_pending(current)) {
> +			return -EFAULT;
> +		}
> +		*dma_p = dma = vfio_find_dma(iommu, iova, size);
> +		if (!dma)
> +			return -EINVAL;
> +	}
> +	return ret;
> +}
> +
> +/*
> + * Find dma struct and wait for its vaddr to be valid.  iommu lock is dropped
> + * if the task waits, but is re-locked on return.  Return result in *dma_p.
> + * Return 0 on success, 1 on success if waited,  and -errno on error.
> + */
> +static int vfio_find_vaddr(struct vfio_iommu *iommu, dma_addr_t start,
> +			   size_t size, struct vfio_dma **dma_p)

more of a vfio_find_dma_valid()

> +{
> +	*dma_p = vfio_find_dma(iommu, start, size);
> +	if (!*dma_p)
> +		return -EINVAL;
> +	return vfio_vaddr_wait(iommu, dma_p);
> +}
> +
> +/*
>   * Attempt to pin pages.  We really don't want to track all the pfns and
>   * the iommu can only map chunks of consecutive pfns anyway, so get the
>   * first page and all consecutive pages with the same locking.
> @@ -693,11 +743,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
>  		struct vfio_pfn *vpfn;
>  
>  		iova = user_pfn[i] << PAGE_SHIFT;
> -		dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
> -		if (!dma) {
> -			ret = -EINVAL;
> +		ret = vfio_find_vaddr(iommu, iova, PAGE_SIZE, &dma);
> +		if (ret < 0)
>  			goto pin_unwind;
> -		}
> +		else if (ret == WAITED)
> +			do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);

We're iterating through an array of pfns to provide translations, once
we've released the lock it's not just the current one that could be
invalid.  I'm afraid we need to unwind and try again, but I think it's
actually worse than that because if we've marked pages within a
vfio_dma's pfn_list as pinned, then during the locking gap it gets
unmapped, the unmap path will call the unmap notifier chain to release
the page that the vendor driver doesn't have yet.  Yikes!

>  
>  		if ((dma->prot & prot) != prot) {
>  			ret = -EPERM;
> @@ -1496,6 +1546,22 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
>  	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>  	int ret;
>  
> +	/*
> +	 * Wait for all vaddr to be valid so they can be used in the main loop.
> +	 * If we do wait, the lock was dropped and re-taken, so start over to
> +	 * ensure the dma list is consistent.
> +	 */
> +again:
> +	for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
> +		struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
> +
> +		ret = vfio_vaddr_wait(iommu, &dma);
> +		if (ret < 0)
> +			return ret;
> +		else if (ret == WAITED)
> +			goto again;
> +	}

This "do nothing until all the vaddrs are valid" approach could work
above too, but gosh is that a lot of cache unfriendly work for a rare
invalidation.  Thanks,

Alex

> +
>  	/* Arbitrarily pick the first domain in the list for lookups */
>  	if (!list_empty(&iommu->domain_list))
>  		d = list_first_entry(&iommu->domain_list,
> @@ -2522,6 +2588,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
>  	iommu->controlled = true;
>  	mutex_init(&iommu->lock);
>  	BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
> +	init_waitqueue_head(&iommu->vaddr_wait);
>  
>  	return iommu;
>  }
> @@ -2972,12 +3039,13 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
>  	struct vfio_dma *dma;
>  	bool kthread = current->mm == NULL;
>  	size_t offset;
> +	int ret;
>  
>  	*copied = 0;
>  
> -	dma = vfio_find_dma(iommu, user_iova, 1);
> -	if (!dma)
> -		return -EINVAL;
> +	ret = vfio_find_vaddr(iommu, user_iova, 1, &dma);
> +	if (ret < 0)
> +		return ret;
>  
>  	if ((write && !(dma->prot & IOMMU_WRITE)) ||
>  			!(dma->prot & IOMMU_READ))
> @@ -3055,6 +3123,7 @@ static void vfio_iommu_type1_notify(void *iommu_data, unsigned int event,
>  	mutex_lock(&iommu->lock);
>  	iommu->controlled = false;
>  	mutex_unlock(&iommu->lock);
> +	wake_up_all(&iommu->vaddr_wait);
>  }
>  
>  static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux