On 1/22/2021 4:48 PM, Alex Williamson wrote: > On Tue, 19 Jan 2021 09:48:26 -0800 > Steve Sistare <steven.sistare@xxxxxxxxxx> wrote: > >> Implement VFIO_DMA_UNMAP_FLAG_VADDR, VFIO_DMA_MAP_FLAG_VADDR, and >> VFIO_UPDATE_VADDR. This is a partial implementation. Blocking is >> added in the next patch. >> >> Signed-off-by: Steve Sistare <steven.sistare@xxxxxxxxxx> >> --- >> drivers/vfio/vfio_iommu_type1.c | 54 ++++++++++++++++++++++++++++++++++++++--- >> 1 file changed, 51 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c >> index ef83018..c307f62 100644 >> --- a/drivers/vfio/vfio_iommu_type1.c >> +++ b/drivers/vfio/vfio_iommu_type1.c >> @@ -92,6 +92,7 @@ struct vfio_dma { >> int prot; /* IOMMU_READ/WRITE */ >> bool iommu_mapped; >> bool lock_cap; /* capable(CAP_IPC_LOCK) */ >> + bool vaddr_valid; >> struct task_struct *task; >> struct rb_root pfn_list; /* Ex-user pinned pfn list */ >> unsigned long *bitmap; >> @@ -1101,6 +1102,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, >> dma_addr_t iova = unmap->iova; >> unsigned long size = unmap->size; >> bool unmap_all = !!(unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL); >> + bool invalidate = !!(unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR); >> >> mutex_lock(&iommu->lock); >> >> @@ -1181,6 +1183,18 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, >> if (dma->task->mm != current->mm) >> break; >> >> + if (invalidate) { >> + if (!dma->vaddr_valid) >> + goto unwind; >> + dma->vaddr_valid = false; >> + unmapped += dma->size; >> + size -= dma->iova + dma->size - iova; >> + iova = dma->iova + dma->size; >> + if (iova == 0) >> + break; >> + continue; >> + } > > Clearly this is where the change to find-first semantics should have > been introduced, the previous patch didn't require it. Would it really > be that unsightly to do: > > while (1) { > if (unlikely(invalidate)) > dma = vfio_find_dma_first(... > else > dma = vfio_find_dma(... > > if (!dma) > break; > > Find-first followed by a tree walk might be more efficient, but of > course requires a full loop redesign. OK, I'll bite. How about: n = vfio_find_dma_first(iommu, iova, size); while (n) { dma = rb_entry(n, struct vfio_dma, node); ... n = rb_next(n); vfio_remove_dma(iommu, dma); } plus similar changes in unwind. This costs logN + N, eg O(N). For unmap-all, I could do find-any in constant time instead of logN and suppress the rb_next, but the code would be messier and the overall cost would still be O(N), so it does not seem worth it. >> + >> if (!RB_EMPTY_ROOT(&dma->pfn_list)) { >> struct vfio_iommu_type1_dma_unmap nb_unmap; >> >> @@ -1218,6 +1232,20 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, >> unmapped += dma->size; >> vfio_remove_dma(iommu, dma); >> } >> + goto unlock; >> + >> +unwind: >> + iova = unmap->iova; >> + size = unmap_all ? SIZE_MAX : unmap->size; >> + dma_last = dma; >> + while ((dma = vfio_find_dma_first(iommu, iova, size)) && >> + dma != dma_last) { >> + dma->vaddr_valid = true; >> + size -= dma->iova + dma->size - iova; >> + iova = dma->iova + dma->size; >> + } >> + ret = -EINVAL; >> + unmapped = 0; >> >> unlock: >> mutex_unlock(&iommu->lock); >> @@ -1319,6 +1347,7 @@ static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu, >> static int vfio_dma_do_map(struct vfio_iommu *iommu, >> struct vfio_iommu_type1_dma_map *map) >> { >> + bool update = map->flags & VFIO_DMA_MAP_FLAG_VADDR; > > Please pick a slightly more specific variable name, update_vaddr maybe. > Perhaps even clear_vaddr rather than invalidate above for consistency. OK. >> dma_addr_t iova = map->iova; >> unsigned long vaddr = map->vaddr; >> size_t size = map->size; >> @@ -1336,13 +1365,16 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, >> if (map->flags & VFIO_DMA_MAP_FLAG_READ) >> prot |= IOMMU_READ; >> >> + if ((prot && update) || (!prot && !update)) >> + return -EINVAL; >> + >> mutex_lock(&iommu->lock); >> >> pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); >> >> WARN_ON((pgsize - 1) & PAGE_MASK); >> >> - if (!prot || !size || (size | iova | vaddr) & (pgsize - 1)) { >> + if (!size || (size | iova | vaddr) & (pgsize - 1)) { >> ret = -EINVAL; >> goto out_unlock; >> } >> @@ -1353,7 +1385,19 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, >> goto out_unlock; >> } >> >> - if (vfio_find_dma(iommu, iova, size)) { >> + dma = vfio_find_dma(iommu, iova, size); >> + if (update) { >> + if (!dma) { >> + ret = -ENOENT; >> + } else if (dma->vaddr_valid || dma->iova != iova || >> + dma->size != size) { >> + ret = -EINVAL; >> + } else { >> + dma->vaddr = vaddr; >> + dma->vaddr_valid = true; >> + } >> + goto out_unlock; >> + } else if (dma) { >> ret = -EEXIST; >> goto out_unlock; >> } >> @@ -1377,6 +1421,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, >> iommu->dma_avail--; >> dma->iova = iova; >> dma->vaddr = vaddr; >> + dma->vaddr_valid = true; >> dma->prot = prot; >> >> /* >> @@ -2545,6 +2590,7 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, >> case VFIO_TYPE1v2_IOMMU: >> case VFIO_TYPE1_NESTING_IOMMU: >> case VFIO_UNMAP_ALL: >> + case VFIO_UPDATE_VADDR: >> return 1; >> case VFIO_DMA_CC_IOMMU: >> if (!iommu) >> @@ -2699,7 +2745,8 @@ static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu, >> { >> struct vfio_iommu_type1_dma_map map; >> unsigned long minsz; >> - uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; >> + uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE | >> + VFIO_DMA_MAP_FLAG_VADDR; >> >> minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); >> >> @@ -2718,6 +2765,7 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, >> struct vfio_iommu_type1_dma_unmap unmap; >> struct vfio_bitmap bitmap = { 0 }; >> uint32_t mask = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP | >> + VFIO_DMA_UNMAP_FLAG_VADDR | >> VFIO_DMA_UNMAP_FLAG_ALL; > > dirty + vaddr would need a separate test per my previous patch comment. > Thanks, Yes. - Steve