On Thu, Jul 14, 2022 at 06:18:22PM +1000, Alexey Kardashevskiy wrote: > +/* > + * A simple iommu_ops to allow less cruft in generic VFIO code. > + */ > +static bool spapr_tce_iommu_capable(enum iommu_cap cap) > +{ > + switch (cap) { > + case IOMMU_CAP_CACHE_COHERENCY: I would add a remark here that it is because vfio is going to use SPAPR mode but still checks that the iommu driver support coherency - with out that detail it looks very strange to have caps without implementing unmanaged domains > +static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type) > +{ > + struct iommu_domain *dom; > + > + if (type != IOMMU_DOMAIN_BLOCKED) > + return NULL; > + > + dom = kzalloc(sizeof(*dom), GFP_KERNEL); > + if (!dom) > + return NULL; > + > + dom->geometry.aperture_start = 0; > + dom->geometry.aperture_end = ~0ULL; > + dom->geometry.force_aperture = true; A blocked domain doesn't really have an aperture, all DMA is rejected, so I think these can just be deleted and left at zero. Generally I'm suggesting drivers just use a static singleton instance for the blocked domain instead of the allocation like this, but that is a very minor nit. > +static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) > +{ > + struct pci_dev *pdev; > + struct pci_controller *hose; > + > + /* Weirdly iommu_device_register() assigns the same ops to all buses */ > + if (!dev_is_pci(dev)) > + return ERR_PTR(-EPERM); Less "weirdly", more by design. The iommu driver should check if the given struct device is supported or not, it isn't really a bus specific operation. > +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) > +{ > + struct pci_controller *hose; > + struct pci_dev *pdev; > + > + /* Weirdly iommu_device_register() assigns the same ops to all buses */ > + if (!dev_is_pci(dev)) > + return ERR_PTR(-EPERM); This doesn't need repeating, if probe_device() fails then this will never be called. > +static int spapr_tce_iommu_attach_dev(struct iommu_domain *dom, > + struct device *dev) > +{ > + struct iommu_group *grp = iommu_group_get(dev); > + struct iommu_table_group *table_group; > + int ret = -EINVAL; > + > + if (!grp) > + return -ENODEV; > + > + table_group = iommu_group_get_iommudata(grp); > + > + if (dom->type == IOMMU_DOMAIN_BLOCKED) > + ret = table_group->ops->take_ownership(table_group); Ideally there shouldn't be dom->type checks like this. The blocking domain should have its own iommu_domain_ops that only process the blocking operation. Ie call this like spapr_tce_iommu_blocking_attach_dev() Instead of having a "default_domain_ops" leave it NULL and create a spapr_tce_blocking_domain_ops with these two functions and assign it to domain->ops when creating. Then it is really clear these functions are only called for the DOMAIN_BLOCKED type and you don't need to check it. > +static void spapr_tce_iommu_detach_dev(struct iommu_domain *dom, > + struct device *dev) > +{ > + struct iommu_group *grp = iommu_group_get(dev); > + struct iommu_table_group *table_group; > + > + table_group = iommu_group_get_iommudata(grp); > + WARN_ON(dom->type != IOMMU_DOMAIN_BLOCKED); > + table_group->ops->release_ownership(table_group); > +} Ditto > +struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose, > + struct pci_dev *pdev) > +{ > + struct device_node *pdn, *dn = pdev->dev.of_node; > + struct iommu_group *grp; > + struct pci_dn *pci; > + > + pdn = pci_dma_find(dn, NULL); > + if (!pdn || !PCI_DN(pdn)) > + return ERR_PTR(-ENODEV); > + > + pci = PCI_DN(pdn); > + if (!pci->table_group) > + return ERR_PTR(-ENODEV); > + > + grp = pci->table_group->group; > + if (!grp) > + return ERR_PTR(-ENODEV); > + > + return iommu_group_ref_get(grp); Not for this series, but this is kind of backwards, the driver specific data (ie the table_group) should be in iommu_group_get_iommudata()... > diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c > index 8a65ea61744c..3b53b466e49b 100644 > --- a/drivers/vfio/vfio_iommu_spapr_tce.c > +++ b/drivers/vfio/vfio_iommu_spapr_tce.c > @@ -1152,8 +1152,6 @@ static void tce_iommu_release_ownership(struct tce_container *container, > for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) > if (container->tables[i]) > table_group->ops->unset_window(table_group, i); > - > - table_group->ops->release_ownership(table_group); > } > > static long tce_iommu_take_ownership(struct tce_container *container, > @@ -1161,10 +1159,6 @@ static long tce_iommu_take_ownership(struct tce_container *container, > { > long i, ret = 0; > > - ret = table_group->ops->take_ownership(table_group); > - if (ret) > - return ret; > - > /* Set all windows to the new group */ > for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { > struct iommu_table *tbl = container->tables[i]; > @@ -1183,8 +1177,6 @@ static long tce_iommu_take_ownership(struct tce_container *container, > for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) > table_group->ops->unset_window(table_group, i); > > - table_group->ops->release_ownership(table_group); > - This is great, makes alot of sense. Anyhow, it all looks fine to me as is even: Reviewed-by: Jason Gunthorpe <jgg@xxxxxxxxxx> Jason