On 1/29/2021 4:56 PM, Alex Williamson wrote: > On Fri, 29 Jan 2021 08:54:09 -0800 > Steve Sistare <steven.sistare@xxxxxxxxxx> wrote: > >> Implement VFIO_DMA_UNMAP_FLAG_VADDR, VFIO_DMA_MAP_FLAG_VADDR, and >> VFIO_UPDATE_VADDR. This is a partial implementation. Blocking is >> added in a subsequent patch. >> >> Signed-off-by: Steve Sistare <steven.sistare@xxxxxxxxxx> >> --- >> drivers/vfio/vfio_iommu_type1.c | 62 +++++++++++++++++++++++++++++++++++++---- >> 1 file changed, 56 insertions(+), 6 deletions(-) >> >> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c >> index 5823607..cbe3c65 100644 >> --- a/drivers/vfio/vfio_iommu_type1.c >> +++ b/drivers/vfio/vfio_iommu_type1.c >> @@ -74,6 +74,7 @@ struct vfio_iommu { >> bool nesting; >> bool dirty_page_tracking; >> bool pinned_page_dirty_scope; >> + int vaddr_invalid_count; > > Nit, I'll move this up in the struct for alignment/hole purposes. OK. I noticed the hole, but I also noticed that the booleans were not declared in the hole, so I thought perhaps you cared about maintaining binary compatibility. > Also > I think this should be an unsigned, we never use the negative value for > sanity checking and our number of vfio_dma entries is governed by an > unsigned int. Agreed. >> }; >> >> struct vfio_domain { >> @@ -92,6 +93,7 @@ struct vfio_dma { >> int prot; /* IOMMU_READ/WRITE */ >> bool iommu_mapped; >> bool lock_cap; /* capable(CAP_IPC_LOCK) */ >> + bool vaddr_invalid; >> struct task_struct *task; >> struct rb_root pfn_list; /* Ex-user pinned pfn list */ >> unsigned long *bitmap; >> @@ -973,6 +975,8 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) >> vfio_unlink_dma(iommu, dma); >> put_task_struct(dma->task); >> vfio_dma_bitmap_free(dma); >> + if (dma->vaddr_invalid) >> + --iommu->vaddr_invalid_count; > > I'm not a fan of the -- on the left, nor the mixing with ++ on the > right. Can I move the -- to the right? Yes please, same here! I saw a lot of --prefix usage in the kernel so I made a guess that it was a preferred style. >> kfree(dma); >> iommu->dma_avail++; >> } >> @@ -1103,7 +1107,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, >> dma_addr_t iova = unmap->iova; >> unsigned long size = unmap->size; >> bool unmap_all = !!(unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL); >> - struct rb_node *n; >> + bool invalidate_vaddr = !!(unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR); > > Nit, !! is not necessary and inconsistent with the MAP side below, will > remove here and for previous unmap_all use. Yes please. I was not sure if the community was OK with variables declared bool taking values other than 0 or 1. And I missed the !! in map. >> + struct rb_node *n, *first_n, *last_n; >> >> mutex_lock(&iommu->lock); >> >> @@ -1174,7 +1179,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, >> } >> >> ret = 0; >> - n = vfio_find_dma_first(iommu, iova, size); >> + n = first_n = vfio_find_dma_first(iommu, iova, size); >> >> while (n) { >> dma = rb_entry(n, struct vfio_dma, node); >> @@ -1190,6 +1195,16 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, >> if (dma->task->mm != current->mm) >> break; >> >> + if (invalidate_vaddr) { >> + if (dma->vaddr_invalid) >> + goto unwind; > > The unwind actually fits pretty well inline here and avoids the leap > frogging gotos below. last_n can also be scoped within the unwind more > easily here. Sure, that keeps all the vaddr code in one place. >> + dma->vaddr_invalid = true; >> + iommu->vaddr_invalid_count++; >> + unmapped += dma->size; >> + n = rb_next(n); >> + continue; >> + } >> + >> if (!RB_EMPTY_ROOT(&dma->pfn_list)) { >> struct vfio_iommu_type1_dma_unmap nb_unmap; >> >> @@ -1228,6 +1243,18 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, >> n = rb_next(n); >> vfio_remove_dma(iommu, dma); >> } >> + goto unlock; >> + >> +unwind: >> + last_n = n; >> + >> + for (n = first_n; n != last_n; n = rb_next(n)) { >> + dma = rb_entry(n, struct vfio_dma, node); >> + dma->vaddr_invalid = false; >> + --iommu->vaddr_invalid_count; >> + } >> + ret = -EINVAL; >> + unmapped = 0; >> >> unlock: >> mutex_unlock(&iommu->lock); >> @@ -1329,6 +1356,7 @@ static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu, >> static int vfio_dma_do_map(struct vfio_iommu *iommu, >> struct vfio_iommu_type1_dma_map *map) >> { >> + bool set_vaddr = map->flags & VFIO_DMA_MAP_FLAG_VADDR; >> dma_addr_t iova = map->iova; >> unsigned long vaddr = map->vaddr; >> size_t size = map->size; >> @@ -1346,13 +1374,16 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, >> if (map->flags & VFIO_DMA_MAP_FLAG_READ) >> prot |= IOMMU_READ; >> >> + if ((prot && set_vaddr) || (!prot && !set_vaddr)) >> + return -EINVAL; >> + >> mutex_lock(&iommu->lock); >> >> pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); >> >> WARN_ON((pgsize - 1) & PAGE_MASK); >> >> - if (!prot || !size || (size | iova | vaddr) & (pgsize - 1)) { >> + if (!size || (size | iova | vaddr) & (pgsize - 1)) { >> ret = -EINVAL; >> goto out_unlock; >> } >> @@ -1363,7 +1394,20 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, >> goto out_unlock; >> } >> >> - if (vfio_find_dma(iommu, iova, size)) { >> + dma = vfio_find_dma(iommu, iova, size); >> + if (set_vaddr) { >> + if (!dma) { >> + ret = -ENOENT; >> + } else if (!dma->vaddr_invalid || dma->iova != iova || >> + dma->size != size) { >> + ret = -EINVAL; >> + } else { >> + dma->vaddr = vaddr; >> + dma->vaddr_invalid = false; >> + --iommu->vaddr_invalid_count; >> + } >> + goto out_unlock; >> + } else if (dma) { >> ret = -EEXIST; >> goto out_unlock; >> } >> @@ -1387,6 +1431,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, >> iommu->dma_avail--; >> dma->iova = iova; >> dma->vaddr = vaddr; >> + dma->vaddr_invalid = false; >> dma->prot = prot; >> >> /* >> @@ -2483,6 +2528,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) >> INIT_LIST_HEAD(&iommu->iova_list); >> iommu->dma_list = RB_ROOT; >> iommu->dma_avail = dma_entry_limit; >> + iommu->vaddr_invalid_count = 0; > > Nit, we can drop this. OK. - Steve >> mutex_init(&iommu->lock); >> BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); >> >> @@ -2555,6 +2601,7 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, >> case VFIO_TYPE1v2_IOMMU: >> case VFIO_TYPE1_NESTING_IOMMU: >> case VFIO_UNMAP_ALL: >> + case VFIO_UPDATE_VADDR: >> return 1; >> case VFIO_DMA_CC_IOMMU: >> if (!iommu) >> @@ -2709,7 +2756,8 @@ static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu, >> { >> struct vfio_iommu_type1_dma_map map; >> unsigned long minsz; >> - uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; >> + uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE | >> + VFIO_DMA_MAP_FLAG_VADDR; >> >> minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); >> >> @@ -2728,6 +2776,7 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, >> struct vfio_iommu_type1_dma_unmap unmap; >> struct vfio_bitmap bitmap = { 0 }; >> uint32_t mask = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP | >> + VFIO_DMA_UNMAP_FLAG_VADDR | >> VFIO_DMA_UNMAP_FLAG_ALL; >> unsigned long minsz; >> int ret; >> @@ -2741,7 +2790,8 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, >> return -EINVAL; >> >> if ((unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) && >> - (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL)) >> + (unmap.flags & (VFIO_DMA_UNMAP_FLAG_ALL | >> + VFIO_DMA_UNMAP_FLAG_VADDR))) >> return -EINVAL; >> >> if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { >