On 12/13/2022 3:22 PM, Alex Williamson wrote: > On Tue, 13 Dec 2022 11:40:55 -0800 > Steve Sistare <steven.sistare@xxxxxxxxxx> wrote: > >> Disable the VFIO_UPDATE_VADDR capability if mediated devices are present. >> Their kernel threads could be blocked indefinitely by a misbehaving >> userland while trying to pin/unpin pages while vaddrs are being updated. >> >> Do not allow groups to be added to the container while vaddr's are invalid, >> so we never need to block user threads from pinning, and can delete the >> vaddr-waiting code in a subsequent patch. >> > > > Fixes: c3cbab24db38 ("vfio/type1: implement interfaces to update vaddr") will do in both patches, slipped through the cracks. >> Signed-off-by: Steve Sistare <steven.sistare@xxxxxxxxxx> >> --- >> drivers/vfio/vfio_iommu_type1.c | 31 ++++++++++++++++++++++++++++++- >> include/uapi/linux/vfio.h | 15 +++++++++------ >> 2 files changed, 39 insertions(+), 7 deletions(-) >> >> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c >> index 23c24fe..80bdb4d 100644 >> --- a/drivers/vfio/vfio_iommu_type1.c >> +++ b/drivers/vfio/vfio_iommu_type1.c >> @@ -859,6 +859,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, >> if (!iommu->v2) >> return -EACCES; >> >> + WARN_ON(iommu->vaddr_invalid_count); >> + > > I'd expect this to abort and return -errno rather than simply trigger a > warning. I added the three WARN_ON's at your request, but they should never fire because we exclude mdevs. I prefer not to bloat the code with additional checking that never fires, and I would prefer to just delete WARN_ON, but its your call. >> mutex_lock(&iommu->lock); >> >> /* >> @@ -976,6 +978,8 @@ static void vfio_iommu_type1_unpin_pages(void *iommu_data, >> >> mutex_lock(&iommu->lock); >> >> + WARN_ON(iommu->vaddr_invalid_count); >> + > > This should never happen or else I'd suggest this also make an early > exit. I would like to delete the WARN_ON's entirely. >> do_accounting = list_empty(&iommu->domain_list); >> for (i = 0; i < npage; i++) { >> dma_addr_t iova = user_iova + PAGE_SIZE * i; >> @@ -1343,6 +1347,10 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, >> >> mutex_lock(&iommu->lock); >> >> + /* Cannot update vaddr if mdev is present. */ >> + if (invalidate_vaddr && !list_empty(&iommu->emulated_iommu_groups)) >> + goto unlock; > > A different errno here to reflect that the container state is the issue > might be appropriate here. Will do. >> + >> pgshift = __ffs(iommu->pgsize_bitmap); >> pgsize = (size_t)1 << pgshift; >> >> @@ -2189,6 +2197,10 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, >> >> mutex_lock(&iommu->lock); >> >> + /* Attach could require pinning, so disallow while vaddr is invalid. */ >> + if (iommu->vaddr_invalid_count) >> + goto out_unlock; >> + >> /* Check for duplicates */ >> if (vfio_iommu_find_iommu_group(iommu, iommu_group)) >> goto out_unlock; >> @@ -2660,6 +2672,16 @@ static int vfio_domains_have_enforce_cache_coherency(struct vfio_iommu *iommu) >> return ret; >> } >> >> +static int vfio_iommu_has_emulated(struct vfio_iommu *iommu) >> +{ >> + int ret; >> + >> + mutex_lock(&iommu->lock); >> + ret = !list_empty(&iommu->emulated_iommu_groups); >> + mutex_unlock(&iommu->lock); >> + return ret; >> +} >> + >> static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, >> unsigned long arg) >> { >> @@ -2668,8 +2690,13 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, >> case VFIO_TYPE1v2_IOMMU: >> case VFIO_TYPE1_NESTING_IOMMU: >> case VFIO_UNMAP_ALL: >> - case VFIO_UPDATE_VADDR: >> return 1; >> + case VFIO_UPDATE_VADDR: >> + /* >> + * Disable this feature if mdevs are present. They cannot >> + * safely pin/unpin while vaddrs are being updated. >> + */ >> + return iommu && !vfio_iommu_has_emulated(iommu); >> case VFIO_DMA_CC_IOMMU: >> if (!iommu) >> return 0; >> @@ -3080,6 +3107,8 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, >> size_t offset; >> int ret; >> >> + WARN_ON(iommu->vaddr_invalid_count); >> + > > Same as pinning, this should trigger -errno. Thanks, Another one that should never happen. - Steve >> *copied = 0; >> >> ret = vfio_find_dma_valid(iommu, user_iova, 1, &dma); >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h >> index d7d8e09..4e8d344 100644 >> --- a/include/uapi/linux/vfio.h >> +++ b/include/uapi/linux/vfio.h >> @@ -49,7 +49,11 @@ >> /* Supports VFIO_DMA_UNMAP_FLAG_ALL */ >> #define VFIO_UNMAP_ALL 9 >> >> -/* Supports the vaddr flag for DMA map and unmap */ >> +/* >> + * Supports the vaddr flag for DMA map and unmap. Not supported for mediated >> + * devices, so this capability is subject to change as groups are added or >> + * removed. >> + */ >> #define VFIO_UPDATE_VADDR 10 >> >> /* >> @@ -1215,8 +1219,7 @@ struct vfio_iommu_type1_info_dma_avail { >> * Map process virtual addresses to IO virtual addresses using the >> * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. >> * >> - * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and >> - * unblock translation of host virtual addresses in the iova range. The vaddr >> + * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova. The vaddr >> * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To >> * maintain memory consistency within the user application, the updated vaddr >> * must address the same memory object as originally mapped. Failure to do so >> @@ -1267,9 +1270,9 @@ struct vfio_bitmap { >> * must be 0. This cannot be combined with the get-dirty-bitmap flag. >> * >> * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host >> - * virtual addresses in the iova range. Tasks that attempt to translate an >> - * iova's vaddr will block. DMA to already-mapped pages continues. This >> - * cannot be combined with the get-dirty-bitmap flag. >> + * virtual addresses in the iova range. DMA to already-mapped pages continues. >> + * Groups may not be added to the container while any addresses are invalid. >> + * This cannot be combined with the get-dirty-bitmap flag. >> */ >> struct vfio_iommu_type1_dma_unmap { >> __u32 argsz; >