> From: Jason Gunthorpe <jgg@xxxxxxxxxx> > Sent: Monday, May 15, 2023 10:00 PM > > Replace allows all the devices in a group to move in one step to a new > HWPT. Further, the HWPT move is done without going through a blocking > domain so that the IOMMU driver can implement some level of > non-distruption to ongoing DMA if that has meaning for it (eg for future > special driver domains) > > Replace uses a lot of the same logic as normal attach, except the actual > domain change over has different restrictions, and we are careful to > sequence things so that failure is going to leave everything the way it > was, and not get trapped in a blocking domain or something if there is > ENOMEM. > > Reviewed-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx> > Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx> > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > --- > drivers/iommu/iommufd/device.c | 99 ++++++++++++++++++++++++++++++++++ > drivers/iommu/iommufd/main.c | 1 + > 2 files changed, 100 insertions(+) > > diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c > index b7868c877d1c1c..ce758fbe3c525d 100644 > --- a/drivers/iommu/iommufd/device.c > +++ b/drivers/iommu/iommufd/device.c > @@ -4,6 +4,7 @@ > #include <linux/iommufd.h> > #include <linux/slab.h> > #include <linux/iommu.h> > +#include "../iommu-priv.h" > > #include "io_pagetable.h" > #include "iommufd_private.h" > @@ -365,6 +366,84 @@ iommufd_device_do_attach(struct iommufd_device *idev, > return NULL; > } > > +static struct iommufd_hw_pagetable * > +iommufd_device_do_replace(struct iommufd_device *idev, > + struct iommufd_hw_pagetable *hwpt) > +{ > + struct iommufd_group *igroup = idev->igroup; > + struct iommufd_hw_pagetable *old_hwpt; > + unsigned int num_devices = 0; > + struct iommufd_device *cur; > + int rc; > + > + mutex_lock(&idev->igroup->lock); > + > + if (igroup->hwpt == NULL) { > + rc = -EINVAL; > + goto err_unlock; > + } > + > + if (hwpt == igroup->hwpt) { > + mutex_unlock(&idev->igroup->lock); > + return NULL; > + } > + > + /* Try to upgrade the domain we have */ > + list_for_each_entry(cur, &igroup->device_list, group_item) { > + num_devices++; > + if (cur->enforce_cache_coherency) { > + rc = iommufd_hw_pagetable_enforce_cc(hwpt); > + if (rc) > + goto err_unlock; > + } > + } > + > + old_hwpt = igroup->hwpt; > + if (hwpt->ioas != old_hwpt->ioas) { > + list_for_each_entry(cur, &igroup->device_list, group_item) { > + rc = iopt_table_enforce_dev_resv_regions( > + &hwpt->ioas->iopt, cur->dev, NULL); > + if (rc) > + goto err_unresv; > + } > + } > + > + rc = iommufd_group_setup_msi(idev->igroup, hwpt); > + if (rc) > + goto err_unresv; > + > + rc = iommu_group_replace_domain(igroup->group, hwpt->domain); > + if (rc) > + goto err_unresv; > + > + if (hwpt->ioas != old_hwpt->ioas) { > + list_for_each_entry(cur, &igroup->device_list, group_item) > + iopt_remove_reserved_iova(&old_hwpt->ioas->iopt, > + cur->dev); > + } > + > + igroup->hwpt = hwpt; > + > + /* > + * Move the refcounts held by the device_list to the new hwpt. Retain a > + * refcount for this thread as the caller will free it. > + */ > + refcount_add(num_devices, &hwpt->obj.users); > + if (num_devices > 1) > + WARN_ON(refcount_sub_and_test(num_devices - 1, > + &old_hwpt->obj.users)); > + mutex_unlock(&idev->igroup->lock); > + > + /* Caller must destroy old_hwpt */ > + return old_hwpt; > +err_unresv: > + list_for_each_entry(cur, &igroup->device_list, group_item) > + iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev); > +err_unlock: > + mutex_unlock(&idev->igroup->lock); > + return ERR_PTR(rc); > +} > + > typedef struct iommufd_hw_pagetable *(*attach_fn)( > struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt); > > @@ -523,6 +602,26 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 > *pt_id) > } > EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); > > +/** > + * iommufd_device_replace - Change the device's iommu_domain > + * @idev: device to change > + * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE > + * Output the IOMMUFD_OBJ_HW_PAGETABLE ID > + * > + * This is the same as > + * iommufd_device_detach(); > + * iommufd_device_attach(); One blank line here would fix a warning as below in "make htmldocs". Documentation/userspace-api/iommufd:184: ./drivers/iommu/iommufd/device.c:665: WARNING: Definition list ends without a blank line; unexpected unindent. Regards, Yi Liu > + * If it fails then no change is made to the attachment. The iommu driver may > + * implement this so there is no disruption in translation. This can only be > + * called if iommufd_device_attach() has already succeeded. > + */ > +int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id) > +{ > + return iommufd_device_change_pt(idev, pt_id, > + &iommufd_device_do_replace); > +} > +EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, IOMMUFD); > + > /** > * iommufd_device_detach - Disconnect a device to an iommu_domain > * @idev: device to detach > diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c > index 24f30f384df6f9..5b7f70543adb24 100644 > --- a/drivers/iommu/iommufd/main.c > +++ b/drivers/iommu/iommufd/main.c > @@ -466,5 +466,6 @@ module_exit(iommufd_exit); > MODULE_ALIAS_MISCDEV(VFIO_MINOR); > MODULE_ALIAS("devname:vfio/vfio"); > #endif > +MODULE_IMPORT_NS(IOMMUFD_INTERNAL); > MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); > MODULE_LICENSE("GPL"); > -- > 2.40.1