Re: [PATCH RFC v3 2/4] vdpa/mlx5: implement .reset_map driver op

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 9/10/2023 8:48 PM, Jason Wang wrote:
On Sat, Sep 9, 2023 at 9:46 PM Si-Wei Liu <si-wei.liu@xxxxxxxxxx> wrote:
Today, mlx5_vdpa gets started by preallocate 1:1 DMA mapping at
device creation time, while this 1:1 mapping will be implicitly
destroyed when the first .set_map call is invoked. Everytime
when the .reset callback is invoked, any mapping left behind will
be dropped then reset back to the initial 1:1 DMA mapping.

In order to reduce excessive memory mapping cost during live
migration, it is desirable to decouple the vhost-vdpa iotlb
abstraction from the virtio device life cycle, i.e. mappings
should be left intact across virtio device reset. Leverage the
.reset_map callback to reset memory mapping, then the device
.reset routine can run free from having to clean up memory
mappings.
It's not clear the direct relationship between the persistent mapping
and reset_map.
Consider .reset_map as a simplified abstraction for on-chip IOMMU model, decoupling memory mapping mode switching from the current vdpa_reset hack. Slightly different than platform IOMMU iommu_domain_alloc/free, but works the best with existing .dma_map/.set_map APIs. As said in the other email, the distinction cannot be hidden, as there are bus drivers with varied mapping needs. On the other hand, I can live with the iommu_domain_alloc/free flavor strictly following the platform IOMMU model, but not sure if worth the complexity.

Could we do it step by step? For example, remove the
mlx5_vdpa_destroy_mr() in mlx5_vdpa_reset() when PERSIST_IOTLB exists?
I think today there's no way for the parent driver to negotiate backend features with userspace, for e.g. parent won't be able to perform mlx5_vdpa_destroy_mr for the virtio-vdpa case when PERSIST_IOTLB doesn't exist. And this backend features stuff is a vhost specific thing, not specifically tied to vdpa itself. How do we get it extended and propagated up to the vdpa bus layer?

And then we can deal with the resetting and others on top,
For this proposed fix, dealing with vdpa_reset from vhost-vdpa is not specifically an issue, but how to get the mapping reverted back to 1:1 identity/passthrough when users want to switch from vhost-vdpa to virtio-vdpa is.

  or it needs
some explanation for why reset_map() must be done first.
Yep, I can add more to the commit log.

Thanks,
-Siwei


Thanks

Signed-off-by: Si-Wei Liu <si-wei.liu@xxxxxxxxxx>

---
RFC v1 -> v2:
   - fix error path when both CVQ and DVQ fall in same asid
---
  drivers/vdpa/mlx5/core/mlx5_vdpa.h |  1 +
  drivers/vdpa/mlx5/core/mr.c        | 70 +++++++++++++++++++++++---------------
  drivers/vdpa/mlx5/net/mlx5_vnet.c  | 18 +++++++---
  3 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index b53420e..5c9a25a 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -123,6 +123,7 @@ int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
                         unsigned int asid);
  void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
  void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
+int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid);

  #define mlx5_vdpa_warn(__dev, format, ...)                                                         \
         dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__,     \
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index 5a1971fc..ec2c7b4e1 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -489,21 +489,15 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
         }
  }

-static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev)
  {
-       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
-               return;
-
         prune_iotlb(mvdev);
  }

-static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev)
  {
         struct mlx5_vdpa_mr *mr = &mvdev->mr;

-       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
-               return;
-
         if (!mr->initialized)
                 return;

@@ -521,8 +515,10 @@ void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)

         mutex_lock(&mr->mkey_mtx);

-       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
-       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
+       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid)
+               _mlx5_vdpa_destroy_dvq_mr(mvdev);
+       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid)
+               _mlx5_vdpa_destroy_cvq_mr(mvdev);

         mutex_unlock(&mr->mkey_mtx);
  }
@@ -534,25 +530,17 @@ void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
  }

  static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
-                                   struct vhost_iotlb *iotlb,
-                                   unsigned int asid)
+                                   struct vhost_iotlb *iotlb)
  {
-       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
-               return 0;
-
         return dup_iotlb(mvdev, iotlb);
  }

  static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
-                                   struct vhost_iotlb *iotlb,
-                                   unsigned int asid)
+                                   struct vhost_iotlb *iotlb)
  {
         struct mlx5_vdpa_mr *mr = &mvdev->mr;
         int err;

-       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
-               return 0;
-
         if (mr->initialized)
                 return 0;

@@ -574,18 +562,22 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
  {
         int err;

-       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
-       if (err)
-               return err;
-
-       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
-       if (err)
-               goto out_err;
+       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
+               err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb);
+               if (err)
+                       return err;
+       }
+       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
+               err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb);
+               if (err)
+                       goto out_err;
+       }

         return 0;

  out_err:
-       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
+       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid)
+               _mlx5_vdpa_destroy_dvq_mr(mvdev);

         return err;
  }
@@ -601,6 +593,28 @@ int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
         return err;
  }

+int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+       struct mlx5_vdpa_mr *mr = &mvdev->mr;
+       int err = 0;
+
+       if (asid != 0)
+               return 0;
+
+       mutex_lock(&mr->mkey_mtx);
+       if (!mr->user_mr)
+               goto out;
+       _mlx5_vdpa_destroy_dvq_mr(mvdev);
+       if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
+               err = _mlx5_vdpa_create_dvq_mr(mvdev, NULL);
+               if (err)
+                       mlx5_vdpa_warn(mvdev, "create DMA MR failed\n");
+       }
+out:
+       mutex_unlock(&mr->mkey_mtx);
+       return err;
+}
+
  int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
                              bool *change_map, unsigned int asid)
  {
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 37be945..3cb5db6 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2824,7 +2824,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
         unregister_link_notifier(ndev);
         teardown_driver(ndev);
         clear_vqs_ready(ndev);
-       mlx5_vdpa_destroy_mr(&ndev->mvdev);
         ndev->mvdev.status = 0;
         ndev->mvdev.suspended = false;
         ndev->cur_num_vqs = 0;
@@ -2835,10 +2834,6 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
         init_group_to_asid_map(mvdev);
         ++mvdev->generation;

-       if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
-               if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
-                       mlx5_vdpa_warn(mvdev, "create MR failed\n");
-       }
         up_write(&ndev->reslock);

         return 0;
@@ -2903,6 +2898,18 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
         return err;
  }

+static int mlx5_vdpa_reset_map(struct vdpa_device *vdev, unsigned int asid)
+{
+       struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+       struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+       int err;
+
+       down_write(&ndev->reslock);
+       err = mlx5_vdpa_reset_mr(mvdev, asid);
+       up_write(&ndev->reslock);
+       return err;
+}
+
  static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
  {
         struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
@@ -3162,6 +3169,7 @@ static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
         .set_config = mlx5_vdpa_set_config,
         .get_generation = mlx5_vdpa_get_generation,
         .set_map = mlx5_vdpa_set_map,
+       .reset_map = mlx5_vdpa_reset_map,
         .set_group_asid = mlx5_set_group_asid,
         .get_vq_dma_dev = mlx5_get_vq_dma_dev,
         .free = mlx5_vdpa_free,
--
1.8.3.1


_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization




[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux