On Wed, Jan 27, 2021 at 11:51 AM Jason Wang <jasowang@xxxxxxxxxx> wrote: > > > On 2021/1/20 下午3:52, Yongji Xie wrote: > > On Wed, Jan 20, 2021 at 2:24 PM Jason Wang <jasowang@xxxxxxxxxx> wrote: > >> > >> On 2021/1/19 下午12:59, Xie Yongji wrote: > >>> Add an opaque pointer for vhost IOTLB to store the > >>> corresponding vma->vm_file and offset on the DMA mapping. > >> > >> Let's split the patch into two. > >> > >> 1) opaque pointer > >> 2) vma stuffs > >> > > OK. > > > >>> It will be used in VDUSE case later. > >>> > >>> Suggested-by: Jason Wang <jasowang@xxxxxxxxxx> > >>> Signed-off-by: Xie Yongji <xieyongji@xxxxxxxxxxxxx> > >>> --- > >>> drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 ++++--- > >>> drivers/vhost/iotlb.c | 5 ++- > >>> drivers/vhost/vdpa.c | 66 +++++++++++++++++++++++++++++++++++----- > >>> drivers/vhost/vhost.c | 4 +-- > >>> include/linux/vdpa.h | 3 +- > >>> include/linux/vhost_iotlb.h | 8 ++++- > >>> 6 files changed, 79 insertions(+), 18 deletions(-) > >>> > >>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c > >>> index 03c796873a6b..1ffcef67954f 100644 > >>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c > >>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c > >>> @@ -279,7 +279,7 @@ static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page, > >>> */ > >>> spin_lock(&vdpasim->iommu_lock); > >>> ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1, > >>> - pa, dir_to_perm(dir)); > >>> + pa, dir_to_perm(dir), NULL); > >> > >> Maybe its better to introduce > >> > >> vhost_iotlb_add_range_ctx() which can accepts the opaque (context). And > >> let vhost_iotlb_add_range() just call that. > >> > > If so, we need export both vhost_iotlb_add_range() and > > vhost_iotlb_add_range_ctx() which will be used in VDUSE driver. Is it > > a bit redundant? > > > Probably not, we do something similar in virtio core: > > void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len, > void **ctx) > { > struct vring_virtqueue *vq = to_vvq(_vq); > > return vq->packed_ring ? virtqueue_get_buf_ctx_packed(_vq, len, ctx) : > virtqueue_get_buf_ctx_split(_vq, len, ctx); > } > EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx); > > void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) > { > return virtqueue_get_buf_ctx(_vq, len, NULL); > } > EXPORT_SYMBOL_GPL(virtqueue_get_buf); > I see. Will do it in the next version. > > > > >>> spin_unlock(&vdpasim->iommu_lock); > >>> if (ret) > >>> return DMA_MAPPING_ERROR; > >>> @@ -317,7 +317,7 @@ static void *vdpasim_alloc_coherent(struct device *dev, size_t size, > >>> > >>> ret = vhost_iotlb_add_range(iommu, (u64)pa, > >>> (u64)pa + size - 1, > >>> - pa, VHOST_MAP_RW); > >>> + pa, VHOST_MAP_RW, NULL); > >>> if (ret) { > >>> *dma_addr = DMA_MAPPING_ERROR; > >>> kfree(addr); > >>> @@ -625,7 +625,8 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, > >>> for (map = vhost_iotlb_itree_first(iotlb, start, last); map; > >>> map = vhost_iotlb_itree_next(map, start, last)) { > >>> ret = vhost_iotlb_add_range(vdpasim->iommu, map->start, > >>> - map->last, map->addr, map->perm); > >>> + map->last, map->addr, > >>> + map->perm, NULL); > >>> if (ret) > >>> goto err; > >>> } > >>> @@ -639,14 +640,14 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, > >>> } > >>> > >>> static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size, > >>> - u64 pa, u32 perm) > >>> + u64 pa, u32 perm, void *opaque) > >>> { > >>> struct vdpasim *vdpasim = vdpa_to_sim(vdpa); > >>> int ret; > >>> > >>> spin_lock(&vdpasim->iommu_lock); > >>> ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa, > >>> - perm); > >>> + perm, NULL); > >>> spin_unlock(&vdpasim->iommu_lock); > >>> > >>> return ret; > >>> diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c > >>> index 0fd3f87e913c..3bd5bd06cdbc 100644 > >>> --- a/drivers/vhost/iotlb.c > >>> +++ b/drivers/vhost/iotlb.c > >>> @@ -42,13 +42,15 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_map_free); > >>> * @last: last of IOVA range > >>> * @addr: the address that is mapped to @start > >>> * @perm: access permission of this range > >>> + * @opaque: the opaque pointer for the IOTLB mapping > >>> * > >>> * Returns an error last is smaller than start or memory allocation > >>> * fails > >>> */ > >>> int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, > >>> u64 start, u64 last, > >>> - u64 addr, unsigned int perm) > >>> + u64 addr, unsigned int perm, > >>> + void *opaque) > >>> { > >>> struct vhost_iotlb_map *map; > >>> > >>> @@ -71,6 +73,7 @@ int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, > >>> map->last = last; > >>> map->addr = addr; > >>> map->perm = perm; > >>> + map->opaque = opaque; > >>> > >>> iotlb->nmaps++; > >>> vhost_iotlb_itree_insert(map, &iotlb->root); > >>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > >>> index 36b6950ba37f..e83e5be7cec8 100644 > >>> --- a/drivers/vhost/vdpa.c > >>> +++ b/drivers/vhost/vdpa.c > >>> @@ -488,6 +488,7 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) > >>> struct vhost_dev *dev = &v->vdev; > >>> struct vdpa_device *vdpa = v->vdpa; > >>> struct vhost_iotlb *iotlb = dev->iotlb; > >>> + struct vhost_iotlb_file *iotlb_file; > >>> struct vhost_iotlb_map *map; > >>> struct page *page; > >>> unsigned long pfn, pinned; > >>> @@ -504,6 +505,10 @@ static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) > >>> } > >>> atomic64_sub(map->size >> PAGE_SHIFT, > >>> &dev->mm->pinned_vm); > >>> + } else if (map->opaque) { > >>> + iotlb_file = (struct vhost_iotlb_file *)map->opaque; > >>> + fput(iotlb_file->file); > >>> + kfree(iotlb_file); > >>> } > >>> vhost_iotlb_map_free(iotlb, map); > >>> } > >>> @@ -540,8 +545,8 @@ static int perm_to_iommu_flags(u32 perm) > >>> return flags | IOMMU_CACHE; > >>> } > >>> > >>> -static int vhost_vdpa_map(struct vhost_vdpa *v, > >>> - u64 iova, u64 size, u64 pa, u32 perm) > >>> +static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova, > >>> + u64 size, u64 pa, u32 perm, void *opaque) > >>> { > >>> struct vhost_dev *dev = &v->vdev; > >>> struct vdpa_device *vdpa = v->vdpa; > >>> @@ -549,12 +554,12 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, > >>> int r = 0; > >>> > >>> r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, > >>> - pa, perm); > >>> + pa, perm, opaque); > >>> if (r) > >>> return r; > >>> > >>> if (ops->dma_map) { > >>> - r = ops->dma_map(vdpa, iova, size, pa, perm); > >>> + r = ops->dma_map(vdpa, iova, size, pa, perm, opaque); > >>> } else if (ops->set_map) { > >>> if (!v->in_batch) > >>> r = ops->set_map(vdpa, dev->iotlb); > >>> @@ -591,6 +596,51 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) > >>> } > >>> } > >>> > >>> +static int vhost_vdpa_sva_map(struct vhost_vdpa *v, > >>> + u64 iova, u64 size, u64 uaddr, u32 perm) > >>> +{ > >>> + u64 offset, map_size, map_iova = iova; > >>> + struct vhost_iotlb_file *iotlb_file; > >>> + struct vm_area_struct *vma; > >>> + int ret; > >> > >> Lacking mmap_read_lock(). > >> > > Good catch! Will fix it. > > > >>> + > >>> + while (size) { > >>> + vma = find_vma(current->mm, uaddr); > >>> + if (!vma) { > >>> + ret = -EINVAL; > >>> + goto err; > >>> + } > >>> + map_size = min(size, vma->vm_end - uaddr); > >>> + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start; > >>> + iotlb_file = NULL; > >>> + if (vma->vm_file && (vma->vm_flags & VM_SHARED)) { > >> > >> I wonder if we need more strict check here. When developing vhost-vdpa, > >> I try hard to make sure the map can only work for user pages. > >> > >> So the question is: do we need to exclude MMIO area or only allow shmem > >> to work here? > >> > > Do you mean we need to check VM_MIXEDMAP | VM_PFNMAP here? > > > I meant do we need to allow VM_IO here? (We don't allow such case in > vhost-vdpa now). > OK, let's exclude the vma with VM_IO | VM_PFNMAP. > > > > > It makes sense to me. > > > >> > >>> + iotlb_file = kmalloc(sizeof(*iotlb_file), GFP_KERNEL); > >>> + if (!iotlb_file) { > >>> + ret = -ENOMEM; > >>> + goto err; > >>> + } > >>> + iotlb_file->file = get_file(vma->vm_file); > >>> + iotlb_file->offset = offset; > >>> + } > >> > >> I wonder if it's better to allocate iotlb_file and make iotlb_file->file > >> = NULL && iotlb_file->offset = 0. This can force a consistent code for > >> the vDPA parents. > >> > > Looks fine to me. > > > >> Or we can simply fail the map without a file as backend. > >> > > Actually there will be some vma without vm_file during vm booting. > > > Yes, e.g bios or other rom. Vhost-user has the similar issue and they > filter the out them in qemu. > > For vhost-vDPA, consider it can supports various difference backends, we > can't do that. > OK, I will transfer iotlb_file with NULL file and let the backend do the filtering. Thanks, Yongji