On Thu, Nov 10, 2022 at 06:57:57AM +0000, Tian, Kevin wrote: > > + /* > > + * Emulation for NOIMMU is imperfect in that VFIO blocks almost all > > + * other ioctls. We let them keep working but they mostly fail since no > > + * IOAS should exist. > > + */ > > + if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == > > VFIO_NOIOMMU_IOMMU) > > + return 0; > > + > > if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) > > return -EINVAL; > > also need a check in iommufd_vfio_check_extension() so only > VFIO_NOIOMMU_IOMMU is supported in no-iommu mode. Mm, and some permission checks too > > + if (!IS_ENABLED(CONFIG_VFIO_NO_IOMMU) || > > + group->type != VFIO_NO_IOMMU) { > > + ret = iommufd_vfio_compat_ioas_id(iommufd, > > &ioas_id); > > + if (ret) { > > + iommufd_ctx_put(group->iommufd); > > + goto out_unlock; > > + } > > } > > with above I suppose other ioctls (map/unmap/etc.) are implicitly blocked > given get_compat_ioas() will fail in those paths. this is good. > > btw vfio container requires exact match between group->type and > container->noiommu, i.e. noiommu group can be only attached to noiommu > container. this is another thing to be paired up. Sure, as below So, the missing ingredient here is somone who has the necessary device to test dpdk? I wonder if qemu e1000 is able to do this path? diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index dbef3274803336..c20e55ddc9aa81 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -26,16 +26,35 @@ static struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx) } /** - * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use + * iommufd_vfio_compat_ioas_get_id - Ensure a comat IOAS exists + * @ictx: Context to operate on + * + * Return the ID of the current compatability ioas. The ID can be passed into + * other functions that take an ioas_id. + */ +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) +{ + struct iommufd_ioas *ioas; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + *out_ioas_id = ioas->obj.id; + iommufd_put_object(&ioas->obj); + return 0; +} +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO); + +/** + * iommufd_vfio_compat_ioas_create_id - Return the IOAS ID that vfio should use * @ictx: Context to operate on - * @out_ioas_id: The ioas_id the caller should use * * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate * on since they do not have an IOAS ID input in their ABI. Only attaching a - * group should cause a default creation of the internal ioas, this returns the - * existing ioas if it has already been assigned somehow. + * group should cause a default creation of the internal ioas, this does nothing + * if an existing ioas has already been assigned somehow. */ -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) +int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx) { struct iommufd_ioas *ioas = NULL; struct iommufd_ioas *out_ioas; @@ -53,7 +72,6 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) } xa_unlock(&ictx->objects); - *out_ioas_id = out_ioas->obj.id; if (out_ioas != ioas) { iommufd_put_object(&out_ioas->obj); iommufd_object_abort(ictx, &ioas->obj); @@ -68,7 +86,7 @@ int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) iommufd_object_finalize(ictx, &ioas->obj); return 0; } -EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO); +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_create_id, IOMMUFD_VFIO); int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) { @@ -230,6 +248,9 @@ static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, case VFIO_UNMAP_ALL: return 1; + case VFIO_NOIOMMU_IOMMU: + return IS_ENABLED(CONFIG_VFIO_NOIOMMU); + case VFIO_DMA_CC_IOMMU: return iommufd_vfio_cc_iommu(ictx); @@ -259,6 +280,17 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) struct iommufd_ioas *ioas = NULL; int rc = 0; + /* + * Emulation for NOIMMU is imperfect in that VFIO blocks almost all + * other ioctls. We let them keep working but they mostly fail since no + * IOAS should exist. + */ + if (IS_ENABLED(CONFIG_VFIO_NOIOMMU) && type == VFIO_NOIOMMU_IOMMU) { + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + return 0; + } + if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) return -EINVAL; diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c index 595c7b2146f88c..daa8039da7a8fa 100644 --- a/drivers/vfio/iommufd.c +++ b/drivers/vfio/iommufd.c @@ -18,6 +18,21 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) lockdep_assert_held(&vdev->dev_set->lock); + if (IS_ENABLED(CONFIG_VFIO_NO_IOMMU) && + vdev->group->type == VFIO_NO_IOMMU) { + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + /* + * Require no compat ioas to be assigned to proceed. The basic + * statement is that the user cannot have done something that + * implies they expected translation to exist + */ + if (!iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id)) + return -EPERM; + return 0; + } + /* * If the driver doesn't provide this op then it means the device does * not do DMA at all. So nothing to do. @@ -29,7 +44,7 @@ int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) if (ret) return ret; - ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); + ret = iommufd_vfio_compat_ioas_get_id(ictx, &ioas_id); if (ret) goto err_unbind; ret = vdev->ops->attach_ioas(vdev, &ioas_id); @@ -53,6 +68,10 @@ void vfio_iommufd_unbind(struct vfio_device *vdev) { lockdep_assert_held(&vdev->dev_set->lock); + if (IS_ENABLED(CONFIG_VFIO_NO_IOMMU) && + vdev->group->type == VFIO_NO_IOMMU) + return; + if (vdev->ops->unbind_iommufd) vdev->ops->unbind_iommufd(vdev); } diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index f3c48b8c45627d..b59eff30968a1e 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -747,12 +747,13 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group, iommufd = iommufd_ctx_from_file(f.file); if (!IS_ERR(iommufd)) { - u32 ioas_id; - - ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); - if (ret) { - iommufd_ctx_put(group->iommufd); - goto out_unlock; + if (!IS_ENABLED(CONFIG_VFIO_NO_IOMMU) || + group->type != VFIO_NO_IOMMU) { + ret = iommufd_vfio_compat_ioas_create_id(iommufd); + if (ret) { + iommufd_ctx_put(group->iommufd); + goto out_unlock; + } } group->iommufd = iommufd; diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 7a5d64a1dae482..bf2b3ea5f90fd2 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -61,7 +61,8 @@ void iommufd_access_unpin_pages(struct iommufd_access *access, unsigned long iova, unsigned long length); int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, void *data, size_t len, unsigned int flags); -int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id); +int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx); #else /* !CONFIG_IOMMUFD */ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) { @@ -93,8 +94,7 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long return -EOPNOTSUPP; } -static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, - u32 *out_ioas_id) +static inline int iommufd_vfio_compat_ioas_create_id(struct iommufd_ctx *ictx) { return -EOPNOTSUPP; }