Hi Nic, > From: Nicolin Chen <nicolinc@xxxxxxxxxx> > Sent: Wednesday, February 8, 2023 5:18 AM > > iommu_group_replace_domain() is introduced to support use cases where > an > iommu_group can be attached to a new domain without getting detached > from > the old one. This replacement feature will be useful, for cases such as: > 1) vPASID mode, when a guest wants to replace a single pasid (PASID=0) > table with a larger table (PASID=N) > 2) Nesting mode, when switching the attaching device from an S2 domain > to an S1 domain, or when switching between relevant S1 domains. > as it allows these cases to switch seamlessly without a DMA disruption. > > So, call iommu_group_replace_domain() in the > iommufd_device_do_attach(). > And add a __iommmufd_device_detach helper to allow the replace routine > to > do a partial detach on the current hwpt that's being replaced. Though the > updated locking logic is overcomplicated, it will be eased, once those > iopt_table_add/remove_ioas and list_add/del calls are moved to hwpt's > allocation/destroy() functions in the coming nesting series, as that'll > depend on a new ->domain_alloc_user op in the iommu core. > > Also, block replace operations that are from/to auto_domains, i.e. only > user-allocated hw_pagetables can be replaced or replaced with. > > Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx> > --- > drivers/iommu/iommufd/device.c | 101 +++++++++++++++++------- > drivers/iommu/iommufd/iommufd_private.h | 2 + > 2 files changed, 76 insertions(+), 27 deletions(-) > > diff --git a/drivers/iommu/iommufd/device.c > b/drivers/iommu/iommufd/device.c > index b8c3e3baccb5..8a9834fc129a 100644 > --- a/drivers/iommu/iommufd/device.c > +++ b/drivers/iommu/iommufd/device.c > @@ -9,6 +9,8 @@ > #include "io_pagetable.h" > #include "iommufd_private.h" > > +MODULE_IMPORT_NS(IOMMUFD_INTERNAL); > + > static bool allow_unsafe_interrupts; > module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); > MODULE_PARM_DESC( > @@ -194,9 +196,61 @@ static bool > iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt, > return false; > } > > +/** > + * __iommmufd_device_detach - Detach a device from idev->hwpt to > new_hwpt This function doesn't do anything to make this device attached to new_hwpt. It is done in the iommufd_device_attach_ioas(). New_hwpt here indicates if this detach requires to do some extra thing. E.g. remove reserved iova from the idev->hwpt->ioas. So may just say " Detach a device from idev->hwpt", and explain the usage of new_hwpt in the below. > + * @idev: device to detach > + * @new_hwpt: new hw_pagetable to attach (pass in NULL for a simple > detach) The new hw_pagetable to be attached. > + * @detach_group: flag to call iommu_detach_group > + * > + * This is a cleanup helper shared by the replace and detach routines. > Comparing > + * to a detach routine, a replace routine only needs a partial detach > procedure: > + * it does not need the iommu_detach_group(); it will attach the device to > a new > + * hw_pagetable after a partial detach from the currently attached > hw_pagetable, > + * so certain steps can be skipped if two hw_pagetables have the same > IOAS. > + */ > +static void __iommmufd_device_detach(struct iommufd_device *idev, > + struct iommufd_hw_pagetable > *new_hwpt, > + bool detach_group) > +{ > + struct iommufd_hw_pagetable *hwpt = idev->hwpt; > + struct iommufd_ioas *new_ioas = NULL; > + > + if (new_hwpt) > + new_ioas = new_hwpt->ioas; > + > + mutex_lock(&hwpt->devices_lock); > + list_del(&idev->devices_item); > + if (hwpt->ioas != new_ioas) > + mutex_lock(&hwpt->ioas->mutex); The lock order is mostly hwpt->ioas->mutex and then hwpt->devices_lock. See the iommufd_device_auto_get_domain(). If possible, may switch the order sequence here. Also, rename hwpt to be cur_hwpt, this may help reviewers to distinguish it from the hwpt in the caller of this function. It looks to be a deadlock at first look, but not after closer reading. > + if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { > + if (list_empty(&hwpt->devices)) { > + iopt_table_remove_domain(&hwpt->ioas->iopt, > + hwpt->domain); > + list_del(&hwpt->hwpt_item); > + } > + if (detach_group) > + iommu_detach_group(hwpt->domain, idev->group); > + } > + if (hwpt->ioas != new_ioas) { > + iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev- > >dev); > + mutex_unlock(&hwpt->ioas->mutex); > + } > + mutex_unlock(&hwpt->devices_lock); > + > + if (hwpt->auto_domain) > + iommufd_object_destroy_user(idev->ictx, &hwpt->obj); > + else > + refcount_dec(&hwpt->obj.users); > + > + idev->hwpt = NULL; > + > + refcount_dec(&idev->obj.users); > +} > + > static int iommufd_device_do_attach(struct iommufd_device *idev, > struct iommufd_hw_pagetable *hwpt) > { > + struct iommufd_hw_pagetable *cur_hwpt = idev->hwpt; > phys_addr_t sw_msi_start = PHYS_ADDR_MAX; > int rc; > > @@ -236,7 +290,7 @@ static int iommufd_device_do_attach(struct > iommufd_device *idev, > * the group once for the first device that is in the group. > */ > if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { > - rc = iommu_attach_group(hwpt->domain, idev->group); > + rc = iommu_group_replace_domain(idev->group, hwpt- > >domain); > if (rc) > goto out_iova; > > @@ -249,6 +303,10 @@ static int iommufd_device_do_attach(struct > iommufd_device *idev, > } > } > > + /* Replace the cur_hwpt without iommu_detach_group() */ > + if (cur_hwpt) > + __iommmufd_device_detach(idev, hwpt, false); > + > idev->hwpt = hwpt; > refcount_inc(&hwpt->obj.users); > list_add(&idev->devices_item, &hwpt->devices); > @@ -256,7 +314,10 @@ static int iommufd_device_do_attach(struct > iommufd_device *idev, > return 0; > > out_detach: > - iommu_detach_group(hwpt->domain, idev->group); > + if (cur_hwpt) > + iommu_group_replace_domain(idev->group, cur_hwpt- > >domain); > + else > + iommu_detach_group(hwpt->domain, idev->group); > out_iova: > iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); > out_unlock: > @@ -345,6 +406,13 @@ int iommufd_device_attach(struct iommufd_device > *idev, u32 *pt_id) > struct iommufd_hw_pagetable *hwpt = > container_of(pt_obj, struct > iommufd_hw_pagetable, obj); > > + if (idev->hwpt == hwpt) > + goto out_done; > + if (idev->hwpt && idev->hwpt->auto_domain) { > + rc = -EBUSY; This means if device was attached to an auto_created hwpt, then we cannot replace it with a user allocated hwpt? If yes, this means the replace is not available until user hwpt support, which is part of nesting. > + goto out_put_pt_obj; > + } > + > mutex_lock(&hwpt->ioas->mutex); > rc = iommufd_device_do_attach(idev, hwpt); > mutex_unlock(&hwpt->ioas->mutex); > @@ -356,6 +424,8 @@ int iommufd_device_attach(struct iommufd_device > *idev, u32 *pt_id) > struct iommufd_ioas *ioas = > container_of(pt_obj, struct iommufd_ioas, obj); > > + if (idev->hwpt) > + return -EBUSY; So we don't allow ioas replacement for physical devices. Is it? Looks like emulated devices allows it. > rc = iommufd_device_auto_get_domain(idev, ioas); > if (rc) > goto out_put_pt_obj; > @@ -367,6 +437,7 @@ int iommufd_device_attach(struct iommufd_device > *idev, u32 *pt_id) > } > > refcount_inc(&idev->obj.users); > +out_done: > *pt_id = idev->hwpt->obj.id; > rc = 0; > > @@ -385,31 +456,7 @@ > EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); > */ > void iommufd_device_detach(struct iommufd_device *idev) > { > - struct iommufd_hw_pagetable *hwpt = idev->hwpt; > - > - mutex_lock(&hwpt->ioas->mutex); > - mutex_lock(&hwpt->devices_lock); > - list_del(&idev->devices_item); > - if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { > - if (list_empty(&hwpt->devices)) { > - iopt_table_remove_domain(&hwpt->ioas->iopt, > - hwpt->domain); > - list_del(&hwpt->hwpt_item); > - } > - iommu_detach_group(hwpt->domain, idev->group); > - } > - iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); > - mutex_unlock(&hwpt->devices_lock); > - mutex_unlock(&hwpt->ioas->mutex); > - > - if (hwpt->auto_domain) > - iommufd_object_destroy_user(idev->ictx, &hwpt->obj); > - else > - refcount_dec(&hwpt->obj.users); > - > - idev->hwpt = NULL; > - > - refcount_dec(&idev->obj.users); > + __iommmufd_device_detach(idev, NULL, true); > } > EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); > > diff --git a/drivers/iommu/iommufd/iommufd_private.h > b/drivers/iommu/iommufd/iommufd_private.h > index 593138bb37b8..200c783800ad 100644 > --- a/drivers/iommu/iommufd/iommufd_private.h > +++ b/drivers/iommu/iommufd/iommufd_private.h > @@ -9,6 +9,8 @@ > #include <linux/refcount.h> > #include <linux/uaccess.h> > > +#include "../iommu-priv.h" > + > struct iommu_domain; > struct iommu_group; > struct iommu_option; > -- > 2.39.1 Regards, Yi Liu