On Wed, 14 Dec 2022 11:22:47 -0800 Steve Sistare <steven.sistare@xxxxxxxxxx> wrote: > Disable the VFIO_UPDATE_VADDR capability if mediated devices are present. > Their kernel threads could be blocked indefinitely by a misbehaving > userland while trying to pin/unpin pages while vaddrs are being updated. > > Do not allow groups to be added to the container while vaddr's are invalid, > so we never need to block user threads from pinning, and can delete the > vaddr-waiting code in a subsequent patch. > > Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr") > > Signed-off-by: Steve Sistare <steven.sistare@xxxxxxxxxx> > --- > drivers/vfio/vfio_iommu_type1.c | 41 +++++++++++++++++++++++++++++++++++++++-- > include/uapi/linux/vfio.h | 15 +++++++++------ > 2 files changed, 48 insertions(+), 8 deletions(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 23c24fe..b04f485 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -861,6 +861,12 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > > mutex_lock(&iommu->lock); > > + if (iommu->vaddr_invalid_count) { > + WARN_ONCE(1, "mdev not allowed with VFIO_UPDATE_VADDR\n"); > + ret = -EIO; > + goto pin_done; > + } > + This simplifies to: if (WARN_ONCE(iommu->vaddr_invalid_count, "mdev not allowed with VFIO_UPDATE_VADDR\n")) { ret = -EIO; goto pin_done; } I was sort of figuring this would be a -EPERM or -EBUSY, maybe even -EAGAIN, though perhaps it's academic which errno to return if we should never get here. > /* > * Wait for all necessary vaddr's to be valid so they can be used in > * the main loop without dropping the lock, to avoid racing vs unmap. > @@ -1343,6 +1349,12 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, > > mutex_lock(&iommu->lock); > > + /* Cannot update vaddr if mdev is present. */ > + if (invalidate_vaddr && !list_empty(&iommu->emulated_iommu_groups)) { > + ret = -EIO; > + goto unlock; > + } > + On the other hand, this errno is reachable by the user, and I'm not sure -EIO is the best choice for a condition that's blocked due to use configuration. > pgshift = __ffs(iommu->pgsize_bitmap); > pgsize = (size_t)1 << pgshift; > > @@ -2185,11 +2197,16 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, > struct iommu_domain_geometry *geo; > LIST_HEAD(iova_copy); > LIST_HEAD(group_resv_regions); > - int ret = -EINVAL; > + int ret = -EIO; > > mutex_lock(&iommu->lock); > > + /* Attach could require pinning, so disallow while vaddr is invalid. */ > + if (iommu->vaddr_invalid_count) > + goto out_unlock; > + Also user reachable, so should track if we pick another errno. > /* Check for duplicates */ > + ret = -EINVAL; > if (vfio_iommu_find_iommu_group(iommu, iommu_group)) > goto out_unlock; > > @@ -2660,6 +2677,16 @@ static int vfio_domains_have_enforce_cache_coherency(struct vfio_iommu *iommu) > return ret; > } > > +static int vfio_iommu_has_emulated(struct vfio_iommu *iommu) > +{ > + int ret; > + > + mutex_lock(&iommu->lock); > + ret = !list_empty(&iommu->emulated_iommu_groups); > + mutex_unlock(&iommu->lock); > + return ret; > +} Nit, this could return bool. I suppose it doesn't because the below returns int, but it seems we're already in the realm of creating a boolean value there. > + > static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, > unsigned long arg) > { > @@ -2668,8 +2695,13 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, > case VFIO_TYPE1v2_IOMMU: > case VFIO_TYPE1_NESTING_IOMMU: > case VFIO_UNMAP_ALL: > - case VFIO_UPDATE_VADDR: > return 1; > + case VFIO_UPDATE_VADDR: > + /* > + * Disable this feature if mdevs are present. They cannot > + * safely pin/unpin while vaddrs are being updated. > + */ > + return iommu && !vfio_iommu_has_emulated(iommu); > case VFIO_DMA_CC_IOMMU: > if (!iommu) > return 0; > @@ -3080,6 +3112,11 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, > size_t offset; > int ret; > > + if (iommu->vaddr_invalid_count) { > + WARN_ONCE(1, "mdev not allowed with VFIO_UPDATE_VADDR\n"); > + return -EIO; > + } Same optimization above, but why are we letting the code iterate this multiple times in the _chunk function rather than testing once in the caller? Thanks, Alex > + > *copied = 0; > > ret = vfio_find_dma_valid(iommu, user_iova, 1, &dma); > diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h > index d7d8e09..4e8d344 100644 > --- a/include/uapi/linux/vfio.h > +++ b/include/uapi/linux/vfio.h > @@ -49,7 +49,11 @@ > /* Supports VFIO_DMA_UNMAP_FLAG_ALL */ > #define VFIO_UNMAP_ALL 9 > > -/* Supports the vaddr flag for DMA map and unmap */ > +/* > + * Supports the vaddr flag for DMA map and unmap. Not supported for mediated > + * devices, so this capability is subject to change as groups are added or > + * removed. > + */ > #define VFIO_UPDATE_VADDR 10 > > /* > @@ -1215,8 +1219,7 @@ struct vfio_iommu_type1_info_dma_avail { > * Map process virtual addresses to IO virtual addresses using the > * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. > * > - * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and > - * unblock translation of host virtual addresses in the iova range. The vaddr > + * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova. The vaddr > * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To > * maintain memory consistency within the user application, the updated vaddr > * must address the same memory object as originally mapped. Failure to do so > @@ -1267,9 +1270,9 @@ struct vfio_bitmap { > * must be 0. This cannot be combined with the get-dirty-bitmap flag. > * > * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host > - * virtual addresses in the iova range. Tasks that attempt to translate an > - * iova's vaddr will block. DMA to already-mapped pages continues. This > - * cannot be combined with the get-dirty-bitmap flag. > + * virtual addresses in the iova range. DMA to already-mapped pages continues. > + * Groups may not be added to the container while any addresses are invalid. > + * This cannot be combined with the get-dirty-bitmap flag. > */ > struct vfio_iommu_type1_dma_unmap { > __u32 argsz;