From: Wei Lin Guay <wguay@xxxxxxxx> Summary: Support vfio to export dmabuf to importer such as RDMA NIC that does not support move_notify callback, since not all RDMA driver support on-demand-paging (ODP). There are some use-cases such as bind accelerator that always pinned the device memory via vfio and export it to RDMA NIC such as EFA, BNXT_RE or IRDMA that does not support ODP. Signed-off-by: Wei Lin Guay <wguay@xxxxxxxx> Reviewed-by: Dag Moxnes <dagmoxnes@xxxxxxxx> Reviewed-by: Keith Busch <kbusch@xxxxxxxxxx> Reviewed-by: Nic Viljoen <nviljoen@xxxxxxxx> --- drivers/vfio/pci/dma_buf.c | 32 +++++++++++++++++++++++++++----- drivers/vfio/pci/vfio_pci_core.c | 16 ++++++++++++++++ drivers/vfio/pci/vfio_pci_priv.h | 7 +++++++ 3 files changed, 50 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/pci/dma_buf.c b/drivers/vfio/pci/dma_buf.c index fd772b520cd7..8017f48296cb 100644 --- a/drivers/vfio/pci/dma_buf.c +++ b/drivers/vfio/pci/dma_buf.c @@ -17,6 +17,7 @@ struct vfio_pci_dma_buf { unsigned int orig_nents; size_t offset; bool revoked; + bool pinned; }; static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf, @@ -32,17 +33,38 @@ static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf, return 0; } +bool vfio_pci_dma_buf_pinned(struct vfio_pci_core_device *vdev) +{ + struct vfio_pci_dma_buf *priv; + struct vfio_pci_dma_buf *tmp; + bool pinned = false; + + down_write(&vdev->memory_lock); + list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) { + if (!dma_buf_try_get(priv->dmabuf)) + continue; + if (priv->pinned) { + pinned = true; + break; + } + } + up_write(&vdev->memory_lock); + return pinned; +} + static void vfio_pci_dma_buf_unpin(struct dma_buf_attachment *attachment) { + struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv; + + priv->pinned = false; } static int vfio_pci_dma_buf_pin(struct dma_buf_attachment *attachment) { - /* - * Uses the dynamic interface but must always allow for - * dma_buf_move_notify() to do revoke - */ - return -EINVAL; + struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv; + + priv->pinned = true; + return 0; } static struct sg_table * diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index bb97b4d94eb7..db28fa2cc9a8 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1246,6 +1246,13 @@ static int vfio_pci_ioctl_reset(struct vfio_pci_core_device *vdev, */ vfio_pci_set_power_state(vdev, PCI_D0); + /* + * prevent reset if dma_buf is pinned to avoid stale pinned + * expose to the dmabuf exporter. + */ + if (vfio_pci_dma_buf_pinned(vdev)) + return -EINVAL; + vfio_pci_dma_buf_move(vdev, true); ret = pci_try_reset_function(vdev->pdev); if (__vfio_pci_memory_enabled(vdev)) @@ -2444,6 +2451,15 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, break; } + /* + * prevent reset if dma_buf is pinned to avoid stale pinned + * expose to the dmabuf exporter. + */ + if (vfio_pci_dma_buf_pinned(vdev)) { + ret = -EINVAL; + break; + } + /* * Take the memory write lock for each device and zap BAR * mappings to prevent the user accessing the device while in diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h index 09d3c300918c..43c40dc4751c 100644 --- a/drivers/vfio/pci/vfio_pci_priv.h +++ b/drivers/vfio/pci/vfio_pci_priv.h @@ -107,6 +107,7 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, size_t argsz); void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev); void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked); +bool vfio_pci_dma_buf_pinned(struct vfio_pci_core_device *vdev); #else static int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, @@ -115,6 +116,12 @@ vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags, { return -ENOTTY; } + +static inline bool vfio_pci_dma_buf_pinned(struct vfio_pci_core_device *vdev) +{ + return false; +} + static inline void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev) { } -- 2.43.5