> From: Liu, Yi L <yi.l.liu@xxxxxxxxx> > Sent: Friday, March 3, 2023 2:58 PM > > > From: Jason Gunthorpe <jgg@xxxxxxxxxx> > > Sent: Thursday, March 2, 2023 1:47 AM > > > > On Wed, Mar 01, 2023 at 09:19:07AM +0000, Liu, Yi L wrote: > > > > From: Liu, Yi L <yi.l.liu@xxxxxxxxx> > > > > Sent: Monday, February 27, 2023 7:12 PM > > > [...] > > > > +long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df, > > > > + unsigned long arg) > > > > +{ > > > > + struct vfio_device *device = df->device; > > > > + struct vfio_device_bind_iommufd bind; > > > > + struct iommufd_ctx *iommufd = NULL; > > > > + unsigned long minsz; > > > > + int ret; > > > > + > > > > + minsz = offsetofend(struct vfio_device_bind_iommufd, out_devid); > > > > + > > > > + if (copy_from_user(&bind, (void __user *)arg, minsz)) > > > > + return -EFAULT; > > > > + > > > > + if (bind.argsz < minsz || bind.flags) > > > > + return -EINVAL; > > > > + > > > > + if (!device->ops->bind_iommufd) > > > > + return -ENODEV; > > > > > > Hi Jason, > > > > > > Per the comment in vfio_iommufd_bind(), such device driver > > > won't provide .bind_iommufd(). So shall we allow this ioctl > > > to go longer to call .open_device() instead of failing it here? > > > I think we need to allow it to go further. E.g. leave the check > > > to be in vfio_iommufd_bind(). Otherwise, user may not able > > > to use such devices. Is it? > > > > You are thinking about the crazy mdev samples? > > > > We should probably just change them to provide a 'no dma' set of ops. > > > > > > +struct vfio_device_bind_iommufd { > > > > + __u32 argsz; > > > > + __u32 flags; > > > > + __aligned_u64 dev_cookie; > > > > + __s32 iommufd; > > > > + __u32 out_devid; > > > > > > As above, for the devices that do not do DMA, there is > no .bind_iommufd > > > op, hence no iommufd_device generated. This means no good value > > > can be filled in this out_devid field. So this field is optional. Only > > > for the devices which do DMA, should this out_devid field return a > > > valid ID otherwise an invalid ID would be filled (e.g. value #0 is an > > > invalid value in the iommufd object id pool). Userspace needs to > > > check if the out_devid is valid or not before use. This ID can be further > > > used in iommufd uAPIs like IOMMU_HWPT_ALLOC, > > IOMMU_DEVICE_GET_INFO > > > and etc. > > > > I would say create an access and harmonize the no-DMA devices with the > > emulated devices. > > How about below change? > > diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c > index 4f82a6fa7c6c..e536515086d7 100644 > --- a/drivers/vfio/iommufd.c > +++ b/drivers/vfio/iommufd.c > @@ -18,12 +18,8 @@ int vfio_iommufd_bind(struct vfio_device *vdev, > struct iommufd_ctx *ictx) > > lockdep_assert_held(&vdev->dev_set->lock); > > - /* > - * If the driver doesn't provide this op then it means the device does > - * not do DMA at all. So nothing to do. > - */ > - if (!vdev->ops->bind_iommufd) > - return 0; > + if (WARN_ON(!vdev->ops->bind_iommufd)) > + return -ENODEV; > > ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); > if (ret) > @@ -102,7 +98,9 @@ > EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas); > /* > * The emulated standard ops mean that vfio_device is going to use the > * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using > this > - * ops set should call vfio_register_emulated_iommu_dev(). > + * ops set should call vfio_register_emulated_iommu_dev(). Drivers that > do > + * not call vfio_pin_pages()/vfio_dma_rw() has no need to provide > dma_unmap > + * callback. > */ > > static void vfio_emulated_unmap(void *data, unsigned long iova, > @@ -110,7 +107,8 @@ static void vfio_emulated_unmap(void *data, > unsigned long iova, > { > struct vfio_device *vdev = data; > > - vdev->ops->dma_unmap(vdev, iova, length); > + if (vdev->ops->dma_unmap) > + vdev->ops->dma_unmap(vdev, iova, length); > } > > static const struct iommufd_access_ops vfio_user_ops = { > diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c > index e54eb752e1ba..19391dda5fba 100644 > --- a/samples/vfio-mdev/mbochs.c > +++ b/samples/vfio-mdev/mbochs.c > @@ -1374,6 +1374,9 @@ static const struct vfio_device_ops > mbochs_dev_ops = { > .write = mbochs_write, > .ioctl = mbochs_ioctl, > .mmap = mbochs_mmap, > + .bind_iommufd = vfio_iommufd_emulated_bind, > + .unbind_iommufd = vfio_iommufd_emulated_unbind, > + .attach_ioas = vfio_iommufd_emulated_attach_ioas, > }; > > static struct mdev_driver mbochs_driver = { > diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c > index e8400fdab71d..5f48aef36995 100644 > --- a/samples/vfio-mdev/mdpy.c > +++ b/samples/vfio-mdev/mdpy.c > @@ -663,6 +663,9 @@ static const struct vfio_device_ops mdpy_dev_ops = > { > .write = mdpy_write, > .ioctl = mdpy_ioctl, > .mmap = mdpy_mmap, > + .bind_iommufd = vfio_iommufd_emulated_bind, > + .unbind_iommufd = vfio_iommufd_emulated_unbind, > + .attach_ioas = vfio_iommufd_emulated_attach_ioas, > }; > > static struct mdev_driver mdpy_driver = { > diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c > index e887de672c52..35460901b9f7 100644 > --- a/samples/vfio-mdev/mtty.c > +++ b/samples/vfio-mdev/mtty.c > @@ -1269,6 +1269,9 @@ static const struct vfio_device_ops mtty_dev_ops > = { > .read = mtty_read, > .write = mtty_write, > .ioctl = mtty_ioctl, > + .bind_iommufd = vfio_iommufd_emulated_bind, > + .unbind_iommufd = vfio_iommufd_emulated_unbind, > + .attach_ioas = vfio_iommufd_emulated_attach_ioas, > }; > > static struct mdev_driver mtty_driver = { > > > What should we return here anyhow if an access was created? > > iommufd_access->obj.id. should be fine. Is it? btw. It requires creating iommufd_access in vfio_iommufd_emulated_bind() instead of in the attach(). Seems like Nicolin's replace domain series has a patch to move iommufd_access creation to the bind(). Regards, Yi Liu