Re: [RFC v4 05/11] vdpa: Support transferring virtual addressing during DMA mapping

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 23.2.2021 13.50, Xie Yongji wrote:
This patch introduces an attribute for vDPA device to indicate
whether virtual address can be used. If vDPA device driver set
it, vhost-vdpa bus driver will not pin user page and transfer
userspace virtual address instead of physical address during
DMA mapping. And corresponding vma->vm_file and offset will be
also passed as an opaque pointer.

In the virtual addressing case, who is then responsible for the pinning or even mapping physical pages to the vaddr?


Suggested-by: Jason Wang <jasowang@xxxxxxxxxx>
Signed-off-by: Xie Yongji <xieyongji@xxxxxxxxxxxxx>
---
  drivers/vdpa/ifcvf/ifcvf_main.c   |   2 +-
  drivers/vdpa/mlx5/net/mlx5_vnet.c |   2 +-
  drivers/vdpa/vdpa.c               |   9 +++-
  drivers/vdpa/vdpa_sim/vdpa_sim.c  |   2 +-
  drivers/vhost/vdpa.c              | 104 +++++++++++++++++++++++++++++++-------
  include/linux/vdpa.h              |  20 ++++++--
  6 files changed, 113 insertions(+), 26 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 7c8bbfcf6c3e..228b9f920fea 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -432,7 +432,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
  				    dev, &ifc_vdpa_ops,
-				    IFCVF_MAX_QUEUE_PAIRS * 2, NULL);
+				    IFCVF_MAX_QUEUE_PAIRS * 2, NULL, false);
  	if (adapter == NULL) {
  		IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
  		return -ENOMEM;
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 029822060017..54290438da28 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -1964,7 +1964,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
  	max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
-				 2 * mlx5_vdpa_max_qps(max_vqs), NULL);
+				 2 * mlx5_vdpa_max_qps(max_vqs), NULL, false);
  	if (IS_ERR(ndev))
  		return PTR_ERR(ndev);
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index 9700a0adcca0..fafc0ee5eb05 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -72,6 +72,7 @@ static void vdpa_release_dev(struct device *d)
   * @nvqs: number of virtqueues supported by this device
   * @size: size of the parent structure that contains private data
   * @name: name of the vdpa device; optional.
+ * @use_va: indicate whether virtual address can be used by this device
   *
   * Driver should use vdpa_alloc_device() wrapper macro instead of
   * using this directly.
@@ -81,7 +82,8 @@ static void vdpa_release_dev(struct device *d)
   */
  struct vdpa_device *__vdpa_alloc_device(struct device *parent,
  					const struct vdpa_config_ops *config,
-					int nvqs, size_t size, const char *name)
+					int nvqs, size_t size, const char *name,
+					bool use_va)
  {
  	struct vdpa_device *vdev;
  	int err = -EINVAL;
@@ -92,6 +94,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
  	if (!!config->dma_map != !!config->dma_unmap)
  		goto err;
+ /* It should only work for the device that use on-chip IOMMU */
+	if (use_va && !(config->dma_map || config->set_map))
+		goto err;
+
  	err = -ENOMEM;
  	vdev = kzalloc(size, GFP_KERNEL);
  	if (!vdev)
@@ -108,6 +114,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
  	vdev->config = config;
  	vdev->features_valid = false;
  	vdev->nvqs = nvqs;
+	vdev->use_va = use_va;
if (name)
  		err = dev_set_name(&vdev->dev, "%s", name);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 5cfc262ce055..3a9a2dd4e987 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -235,7 +235,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
  		ops = &vdpasim_config_ops;
vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
-				    dev_attr->nvqs, dev_attr->name);
+				    dev_attr->nvqs, dev_attr->name, false);
  	if (!vdpasim)
  		goto err_alloc;
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 70857fe3263c..93769ace34df 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -480,21 +480,31 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
  static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
  {
  	struct vhost_dev *dev = &v->vdev;
+	struct vdpa_device *vdpa = v->vdpa;
  	struct vhost_iotlb *iotlb = dev->iotlb;
  	struct vhost_iotlb_map *map;
+	struct vdpa_map_file *map_file;
  	struct page *page;
  	unsigned long pfn, pinned;
while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
-		pinned = map->size >> PAGE_SHIFT;
-		for (pfn = map->addr >> PAGE_SHIFT;
-		     pinned > 0; pfn++, pinned--) {
-			page = pfn_to_page(pfn);
-			if (map->perm & VHOST_ACCESS_WO)
-				set_page_dirty_lock(page);
-			unpin_user_page(page);
+		if (!vdpa->use_va) {
+			pinned = map->size >> PAGE_SHIFT;
+			for (pfn = map->addr >> PAGE_SHIFT;
+			     pinned > 0; pfn++, pinned--) {
+				page = pfn_to_page(pfn);
+				if (map->perm & VHOST_ACCESS_WO)
+					set_page_dirty_lock(page);
+				unpin_user_page(page);
+			}
+			atomic64_sub(map->size >> PAGE_SHIFT,
+					&dev->mm->pinned_vm);
+		} else {
+			map_file = (struct vdpa_map_file *)map->opaque;
+			if (map_file->file)
+				fput(map_file->file);
+			kfree(map_file);
  		}
-		atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
  		vhost_iotlb_map_free(iotlb, map);
  	}
  }
@@ -530,21 +540,21 @@ static int perm_to_iommu_flags(u32 perm)
  	return flags | IOMMU_CACHE;
  }
-static int vhost_vdpa_map(struct vhost_vdpa *v,
-			  u64 iova, u64 size, u64 pa, u32 perm)
+static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
+			  u64 size, u64 pa, u32 perm, void *opaque)
  {
  	struct vhost_dev *dev = &v->vdev;
  	struct vdpa_device *vdpa = v->vdpa;
  	const struct vdpa_config_ops *ops = vdpa->config;
  	int r = 0;
- r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
-				  pa, perm);
+	r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
+				      pa, perm, opaque);
  	if (r)
  		return r;
if (ops->dma_map) {
-		r = ops->dma_map(vdpa, iova, size, pa, perm, NULL);
+		r = ops->dma_map(vdpa, iova, size, pa, perm, opaque);
  	} else if (ops->set_map) {
  		if (!v->in_batch)
  			r = ops->set_map(vdpa, dev->iotlb);
@@ -552,13 +562,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
  		r = iommu_map(v->domain, iova, pa, size,
  			      perm_to_iommu_flags(perm));
  	}
-
-	if (r)
+	if (r) {
  		vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
-	else
+		return r;
+	}
+
+	if (!vdpa->use_va)
  		atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
- return r;
+	return 0;
  }
static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
@@ -579,10 +591,60 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
  	}
  }
+static int vhost_vdpa_va_map(struct vhost_vdpa *v,
+			     u64 iova, u64 size, u64 uaddr, u32 perm)
+{
+	struct vhost_dev *dev = &v->vdev;
+	u64 offset, map_size, map_iova = iova;
+	struct vdpa_map_file *map_file;
+	struct vm_area_struct *vma;
+	int ret;
+
+	mmap_read_lock(dev->mm);
+
+	while (size) {
+		vma = find_vma(dev->mm, uaddr);
+		if (!vma) {
+			ret = -EINVAL;
+			goto err;
+		}
+		map_size = min(size, vma->vm_end - uaddr);
+		offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
+		map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
+		if (!map_file) {
+			ret = -ENOMEM;
+			goto err;
+		}
+		if (vma->vm_file && (vma->vm_flags & VM_SHARED) &&
+			!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
+			map_file->file = get_file(vma->vm_file);
+			map_file->offset = offset;
+		}
+		ret = vhost_vdpa_map(v, map_iova, map_size, uaddr,
+				     perm, map_file);
+		if (ret) {
+			if (map_file->file)
+				fput(map_file->file);
+			kfree(map_file);
+			goto err;
+		}
+		size -= map_size;
+		uaddr += map_size;
+		map_iova += map_size;
+	}
+	mmap_read_unlock(dev->mm);
+
+	return 0;
+err:
+	vhost_vdpa_unmap(v, iova, map_iova - iova);
+	return ret;
+}
+
  static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
  					   struct vhost_iotlb_msg *msg)
  {
  	struct vhost_dev *dev = &v->vdev;
+	struct vdpa_device *vdpa = v->vdpa;
  	struct vhost_iotlb *iotlb = dev->iotlb;
  	struct page **page_list;
  	unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
@@ -601,6 +663,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
  				    msg->iova + msg->size - 1))
  		return -EEXIST;
+ if (vdpa->use_va)
+		return vhost_vdpa_va_map(v, msg->iova, msg->size,
+					 msg->uaddr, msg->perm);
+
  	/* Limit the use of memory for bookkeeping */
  	page_list = (struct page **) __get_free_page(GFP_KERNEL);
  	if (!page_list)
@@ -654,7 +720,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
  				csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
  				ret = vhost_vdpa_map(v, iova, csize,
  						     map_pfn << PAGE_SHIFT,
-						     msg->perm);
+						     msg->perm, NULL);
  				if (ret) {
  					/*
  					 * Unpin the pages that are left unmapped
@@ -683,7 +749,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
/* Pin the rest chunk */
  	ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
-			     map_pfn << PAGE_SHIFT, msg->perm);
+			     map_pfn << PAGE_SHIFT, msg->perm, NULL);
  out:
  	if (ret) {
  		if (nchunks) {
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 93dca2c328ae..bfae6d780c38 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -44,6 +44,7 @@ struct vdpa_mgmt_dev;
   * @config: the configuration ops for this device.
   * @index: device index
   * @features_valid: were features initialized? for legacy guests
+ * @use_va: indicate whether virtual address can be used by this device
   * @nvqs: maximum number of supported virtqueues
   * @mdev: management device pointer; caller must setup when registering device as part
   *	  of dev_add() mgmtdev ops callback before invoking _vdpa_register_device().
@@ -54,6 +55,7 @@ struct vdpa_device {
  	const struct vdpa_config_ops *config;
  	unsigned int index;
  	bool features_valid;
+	bool use_va;
  	int nvqs;
  	struct vdpa_mgmt_dev *mdev;
  };
@@ -69,6 +71,16 @@ struct vdpa_iova_range {
  };
/**
+ * Corresponding file area for device memory mapping
+ * @file: vma->vm_file for the mapping
+ * @offset: mapping offset in the vm_file
+ */
+struct vdpa_map_file {
+	struct file *file;
+	u64 offset;
+};
+
+/**
   * vDPA_config_ops - operations for configuring a vDPA device.
   * Note: vDPA device drivers are required to implement all of the
   * operations unless it is mentioned to be optional in the following
@@ -250,14 +262,16 @@ struct vdpa_config_ops {
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
  					const struct vdpa_config_ops *config,
-					int nvqs, size_t size, const char *name);
+					int nvqs, size_t size,
+					const char *name, bool use_va);
-#define vdpa_alloc_device(dev_struct, member, parent, config, nvqs, name) \
+#define vdpa_alloc_device(dev_struct, member, parent, config, \
+			  nvqs, name, use_va) \
  			  container_of(__vdpa_alloc_device( \
  				       parent, config, nvqs, \
  				       sizeof(dev_struct) + \
  				       BUILD_BUG_ON_ZERO(offsetof( \
-				       dev_struct, member)), name), \
+				       dev_struct, member)), name, use_va), \
  				       dev_struct, member)
int vdpa_register_device(struct vdpa_device *vdev);




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux