On Mon, Feb 27, 2023 at 03:11:32AM -0800, Yi Liu wrote: > This adds ioctl for userspace to bind device cdev fd to iommufd. > > VFIO_DEVICE_BIND_IOMMUFD: bind device to an iommufd, hence gain DMA > control provided by the iommufd. open_device > op is called after bind_iommufd op. > VFIO no iommu mode is indicated by passing > a negative iommufd value. > > Signed-off-by: Yi Liu <yi.l.liu@xxxxxxxxx> > --- > drivers/vfio/device_cdev.c | 146 +++++++++++++++++++++++++++++++++++++ > drivers/vfio/vfio.h | 17 ++++- > drivers/vfio/vfio_main.c | 54 ++++++++++++-- > include/linux/iommufd.h | 6 ++ > include/uapi/linux/vfio.h | 34 +++++++++ > 5 files changed, 248 insertions(+), 9 deletions(-) > > diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c > index 9e2c1ecaaf4f..37f80e368551 100644 > --- a/drivers/vfio/device_cdev.c > +++ b/drivers/vfio/device_cdev.c > @@ -3,6 +3,7 @@ > * Copyright (c) 2023 Intel Corporation. > */ > #include <linux/vfio.h> > +#include <linux/iommufd.h> > > #include "vfio.h" > > @@ -45,6 +46,151 @@ int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep) > return ret; > } > > +static void vfio_device_get_kvm_safe(struct vfio_device_file *df) > +{ > + spin_lock(&df->kvm_ref_lock); > + if (!df->kvm) > + goto unlock; > + > + _vfio_device_get_kvm_safe(df->device, df->kvm); > + > +unlock: Just if (df->kvm) _vfio_device_get_kvm_safe(df->device, df->kvm); Without the goto > + spin_unlock(&df->kvm_ref_lock); > +} > + > +void vfio_device_cdev_close(struct vfio_device_file *df) > +{ > + struct vfio_device *device = df->device; > + > + mutex_lock(&device->dev_set->lock); > + /* > + * As df->access_granted writer is under dev_set->lock as well, > + * so this read no need to use smp_load_acquire() to pair with > + * smp_store_release() in the caller of vfio_device_open(). > + */ This is a bit misleading, we are about to free df in the caller, so at this moment df has no current access. We don't even need to have the mutex to test it. > +long vfio_device_ioctl_bind_iommufd(struct vfio_device_file *df, > + unsigned long arg) struct device __user *arg and remove all the casts. > +{ > + struct vfio_device *device = df->device; > + struct vfio_device_bind_iommufd bind; > + struct iommufd_ctx *iommufd = NULL; > + unsigned long minsz; > + int ret; > + > + minsz = offsetofend(struct vfio_device_bind_iommufd, out_devid); > + > + if (copy_from_user(&bind, (void __user *)arg, minsz)) > + return -EFAULT; > + > + if (bind.argsz < minsz || bind.flags) > + return -EINVAL; > + > + if (!device->ops->bind_iommufd) > + return -ENODEV; > + > + ret = vfio_device_block_group(device); > + if (ret) > + return ret; > + > + mutex_lock(&device->dev_set->lock); > + /* > + * If already been bound to an iommufd, or already set noiommu > + * then fail it. > + */ > + if (df->iommufd || df->noiommu) { > + ret = -EINVAL; > + goto out_unlock; > + } > + > + /* iommufd < 0 means noiommu mode */ > + if (bind.iommufd < 0) { > + if (!capable(CAP_SYS_RAWIO)) { > + ret = -EPERM; > + goto out_unlock; > + } > + df->noiommu = true; > + } else { > + iommufd = vfio_get_iommufd_from_fd(bind.iommufd); > + if (IS_ERR(iommufd)) { > + ret = PTR_ERR(iommufd); > + goto out_unlock; > + } > + } > + > + /* > + * Before the device open, get the KVM pointer currently > + * associated with the device file (if there is) and obtain > + * a reference. This reference is held until device closed. > + * Save the pointer in the device for use by drivers. > + */ > + vfio_device_get_kvm_safe(df); > + > + df->iommufd = iommufd; > + ret = vfio_device_open(df, &bind.out_devid, NULL); > + if (ret) > + goto out_put_kvm; > + > + ret = copy_to_user((void __user *)arg + > + offsetofend(struct vfio_device_bind_iommufd, iommufd), ?? &arg->out_dev_id static_assert(__same_type...) > diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h > index 650d45629647..9672cf839687 100644 > --- a/include/linux/iommufd.h > +++ b/include/linux/iommufd.h > @@ -17,6 +17,12 @@ struct iommufd_ctx; > struct iommufd_access; > struct file; > > +/* > + * iommufd core init xarray with flags==XA_FLAGS_ALLOC1, so valid > + * ID starts from 1. > + */ > +#define IOMMUFD_INVALID_ID 0 Why? vfio doesn't need to check this just to generate EINVAL. Jason