Un-inline the domain specific logic from the attach/detach_group ops into two paired functions vfio_iommu_alloc_attach_domain() and vfio_iommu_detach_destroy_domain() that strictly deal with creating and destroying struct vfio_domains. Add the logic to check for EMEDIUMTYPE return code of iommu_attach_group() and avoid the extra domain allocations and attach/detach sequences of the old code. This allows properly detecting an actual attach error, like -ENOMEM, vs treating all attach errors as an incompatible domain. Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx> Co-developed-by: Jason Gunthorpe <jgg@xxxxxxxxxx> Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx> --- drivers/vfio/vfio_iommu_type1.c | 334 +++++++++++++++++--------------- 1 file changed, 180 insertions(+), 154 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 3b63a5a237c9..51d29b8780cd 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2163,14 +2163,179 @@ static int vfio_iommu_domain_alloc(struct device *dev, void *data) return 1; /* Don't iterate */ } +static struct vfio_domain * +vfio_iommu_alloc_attach_domain(struct vfio_iommu *iommu, + struct vfio_iommu_group *group, + struct list_head *group_resv_regions) +{ + struct iommu_domain *new_domain; + struct vfio_domain *domain; + phys_addr_t resv_msi_base; + int ret = 0; + + /* Try to match an existing compatible domain */ + list_for_each_entry (domain, &iommu->domain_list, next) { + ret = iommu_attach_group(domain->domain, group->iommu_group); + /* -EMEDIUMTYPE means an incompatible domain, so try next one */ + if (ret == -EMEDIUMTYPE) + continue; + if (ret) + return ERR_PTR(ret); + goto done; + } + + /* + * Going via the iommu_group iterator avoids races, and trivially gives + * us a representative device for the IOMMU API call. We don't actually + * want to iterate beyond the first device (if any). + */ + iommu_group_for_each_dev(group->iommu_group, &new_domain, + vfio_iommu_domain_alloc); + if (!new_domain) + return ERR_PTR(-EIO); + + if (iommu->nesting) { + ret = iommu_enable_nesting(new_domain); + if (ret) + goto out_free_iommu_domain; + } + + ret = iommu_attach_group(new_domain, group->iommu_group); + if (ret) + goto out_free_iommu_domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) { + ret = -ENOMEM; + goto out_detach; + } + + domain->domain = new_domain; + vfio_test_domain_fgsp(domain); + + /* + * If the IOMMU can block non-coherent operations (ie PCIe TLPs with + * no-snoop set) then VFIO always turns this feature on because on Intel + * platforms it optimizes KVM to disable wbinvd emulation. + */ + if (new_domain->ops->enforce_cache_coherency) + domain->enforce_cache_coherency = + new_domain->ops->enforce_cache_coherency(new_domain); + + /* replay mappings on new domains */ + ret = vfio_iommu_replay(iommu, domain); + if (ret) + goto out_free_domain; + + if (vfio_iommu_has_sw_msi(group_resv_regions, &resv_msi_base)) { + ret = iommu_get_msi_cookie(domain->domain, resv_msi_base); + if (ret && ret != -ENODEV) + goto out_free_domain; + } + + INIT_LIST_HEAD(&domain->group_list); + list_add(&domain->next, &iommu->domain_list); + vfio_update_pgsize_bitmap(iommu); + +done: + list_add(&group->next, &domain->group_list); + + /* + * An iommu backed group can dirty memory directly and therefore + * demotes the iommu scope until it declares itself dirty tracking + * capable via the page pinning interface. + */ + iommu->num_non_pinned_groups++; + + return domain; + +out_free_domain: + kfree(domain); +out_detach: + iommu_detach_group(new_domain, group->iommu_group); +out_free_iommu_domain: + iommu_domain_free(new_domain); + return ERR_PTR(ret); +} + +static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu) +{ + struct rb_node *node; + + while ((node = rb_first(&iommu->dma_list))) + vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node)); +} + +static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu) +{ + struct rb_node *n, *p; + + n = rb_first(&iommu->dma_list); + for (; n; n = rb_next(n)) { + struct vfio_dma *dma; + long locked = 0, unlocked = 0; + + dma = rb_entry(n, struct vfio_dma, node); + unlocked += vfio_unmap_unpin(iommu, dma, false); + p = rb_first(&dma->pfn_list); + for (; p; p = rb_next(p)) { + struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn, + node); + + if (!is_invalid_reserved_pfn(vpfn->pfn)) + locked++; + } + vfio_lock_acct(dma, locked - unlocked, true); + } +} + +static void vfio_iommu_detach_destroy_domain(struct vfio_domain *domain, + struct vfio_iommu *iommu, + struct vfio_iommu_group *group) +{ + iommu_detach_group(domain->domain, group->iommu_group); + list_del(&group->next); + if (!list_empty(&domain->group_list)) + goto out_dirty; + + /* + * Group ownership provides privilege, if the group list is empty, the + * domain goes away. If it's the last domain with iommu and external + * domain doesn't exist, then all the mappings go away too. If it's the + * last domain with iommu and external domain exist, update accounting + */ + if (list_is_singular(&iommu->domain_list)) { + if (list_empty(&iommu->emulated_iommu_groups)) { + WARN_ON(!list_empty(&iommu->device_list)); + vfio_iommu_unmap_unpin_all(iommu); + } else { + vfio_iommu_unmap_unpin_reaccount(iommu); + } + } + iommu_domain_free(domain->domain); + list_del(&domain->next); + kfree(domain); + vfio_update_pgsize_bitmap(iommu); + +out_dirty: + /* + * Removal of a group without dirty tracking may allow the iommu scope + * to be promoted. + */ + if (!group->pinned_page_dirty_scope) { + iommu->num_non_pinned_groups--; + if (iommu->dirty_page_tracking) + vfio_iommu_populate_bitmap_full(iommu); + } +} + static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group, enum vfio_group_type type) { struct vfio_iommu *iommu = iommu_data; struct vfio_iommu_group *group; - struct vfio_domain *domain, *d; - bool resv_msi, msi_remap; - phys_addr_t resv_msi_base = 0; + struct vfio_domain *domain; + bool msi_remap; struct iommu_domain_geometry *geo; LIST_HEAD(iova_copy); LIST_HEAD(group_resv_regions); @@ -2201,32 +2366,17 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_unlock; } - ret = -ENOMEM; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (!domain) + ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions); + if (ret) goto out_free_group; - /* - * Going via the iommu_group iterator avoids races, and trivially gives - * us a representative device for the IOMMU API call. We don't actually - * want to iterate beyond the first device (if any). - */ - ret = -EIO; - iommu_group_for_each_dev(iommu_group, &domain->domain, - vfio_iommu_domain_alloc); - if (!domain->domain) - goto out_free_domain; - - if (iommu->nesting) { - ret = iommu_enable_nesting(domain->domain); - if (ret) - goto out_domain; + domain = vfio_iommu_alloc_attach_domain(iommu, group, + &group_resv_regions); + if (IS_ERR(domain)) { + ret = PTR_ERR(domain); + goto out_free_group; } - ret = iommu_attach_group(domain->domain, group->iommu_group); - if (ret) - goto out_domain; - /* Get aperture info */ geo = &domain->domain->geometry; if (vfio_iommu_aper_conflict(iommu, geo->aperture_start, @@ -2235,10 +2385,6 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_detach; } - ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions); - if (ret) - goto out_detach; - if (vfio_iommu_resv_conflict(iommu, &group_resv_regions)) { ret = -EINVAL; goto out_detach; @@ -2262,11 +2408,6 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_detach; - resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base); - - INIT_LIST_HEAD(&domain->group_list); - list_add(&group->next, &domain->group_list); - msi_remap = irq_domain_check_msi_remap() || iommu_group_for_each_dev(iommu_group, (void *)IOMMU_CAP_INTR_REMAP, vfio_iommu_device_capable); @@ -2278,107 +2419,25 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_detach; } - /* - * If the IOMMU can block non-coherent operations (ie PCIe TLPs with - * no-snoop set) then VFIO always turns this feature on because on Intel - * platforms it optimizes KVM to disable wbinvd emulation. - */ - if (domain->domain->ops->enforce_cache_coherency) - domain->enforce_cache_coherency = - domain->domain->ops->enforce_cache_coherency( - domain->domain); - - /* Try to match an existing compatible domain */ - list_for_each_entry(d, &iommu->domain_list, next) { - iommu_detach_group(domain->domain, group->iommu_group); - if (!iommu_attach_group(d->domain, group->iommu_group)) { - list_add(&group->next, &d->group_list); - iommu_domain_free(domain->domain); - kfree(domain); - goto done; - } - - ret = iommu_attach_group(domain->domain, group->iommu_group); - if (ret) - goto out_domain; - } - - vfio_test_domain_fgsp(domain); - - /* replay mappings on new domains */ - ret = vfio_iommu_replay(iommu, domain); - if (ret) - goto out_detach; - - if (resv_msi) { - ret = iommu_get_msi_cookie(domain->domain, resv_msi_base); - if (ret && ret != -ENODEV) - goto out_detach; - } - - list_add(&domain->next, &iommu->domain_list); - vfio_update_pgsize_bitmap(iommu); -done: /* Delete the old one and insert new iova list */ vfio_iommu_iova_insert_copy(iommu, &iova_copy); - /* - * An iommu backed group can dirty memory directly and therefore - * demotes the iommu scope until it declares itself dirty tracking - * capable via the page pinning interface. - */ - iommu->num_non_pinned_groups++; mutex_unlock(&iommu->lock); vfio_iommu_resv_free(&group_resv_regions); return 0; out_detach: - iommu_detach_group(domain->domain, group->iommu_group); -out_domain: - iommu_domain_free(domain->domain); - vfio_iommu_iova_free(&iova_copy); - vfio_iommu_resv_free(&group_resv_regions); -out_free_domain: - kfree(domain); + vfio_iommu_detach_destroy_domain(domain, iommu, group); out_free_group: kfree(group); out_unlock: mutex_unlock(&iommu->lock); + vfio_iommu_iova_free(&iova_copy); + vfio_iommu_resv_free(&group_resv_regions); return ret; } -static void vfio_iommu_unmap_unpin_all(struct vfio_iommu *iommu) -{ - struct rb_node *node; - - while ((node = rb_first(&iommu->dma_list))) - vfio_remove_dma(iommu, rb_entry(node, struct vfio_dma, node)); -} - -static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu) -{ - struct rb_node *n, *p; - - n = rb_first(&iommu->dma_list); - for (; n; n = rb_next(n)) { - struct vfio_dma *dma; - long locked = 0, unlocked = 0; - - dma = rb_entry(n, struct vfio_dma, node); - unlocked += vfio_unmap_unpin(iommu, dma, false); - p = rb_first(&dma->pfn_list); - for (; p; p = rb_next(p)) { - struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn, - node); - - if (!is_invalid_reserved_pfn(vpfn->pfn)) - locked++; - } - vfio_lock_acct(dma, locked - unlocked, true); - } -} - /* * Called when a domain is removed in detach. It is possible that * the removed domain decided the iova aperture window. Modify the @@ -2493,45 +2552,12 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, group = find_iommu_group(domain, iommu_group); if (!group) continue; - - iommu_detach_group(domain->domain, group->iommu_group); - list_del(&group->next); - /* - * Group ownership provides privilege, if the group list is - * empty, the domain goes away. If it's the last domain with - * iommu and external domain doesn't exist, then all the - * mappings go away too. If it's the last domain with iommu and - * external domain exist, update accounting - */ - if (list_empty(&domain->group_list)) { - if (list_is_singular(&iommu->domain_list)) { - if (list_empty(&iommu->emulated_iommu_groups)) { - WARN_ON(!list_empty( - &iommu->device_list)); - vfio_iommu_unmap_unpin_all(iommu); - } else { - vfio_iommu_unmap_unpin_reaccount(iommu); - } - } - iommu_domain_free(domain->domain); - list_del(&domain->next); - kfree(domain); - vfio_iommu_aper_expand(iommu, &iova_copy); - vfio_update_pgsize_bitmap(iommu); - } - /* - * Removal of a group without dirty tracking may allow - * the iommu scope to be promoted. - */ - if (!group->pinned_page_dirty_scope) { - iommu->num_non_pinned_groups--; - if (iommu->dirty_page_tracking) - vfio_iommu_populate_bitmap_full(iommu); - } + vfio_iommu_detach_destroy_domain(domain, iommu, group); kfree(group); break; } + vfio_iommu_aper_expand(iommu, &iova_copy); if (!vfio_iommu_resv_refresh(iommu, &iova_copy)) vfio_iommu_iova_insert_copy(iommu, &iova_copy); else -- 2.17.1