On Thu, 19 Mar 2020 01:11:14 +0530 Kirti Wankhede <kwankhede@xxxxxxxxxx> wrote: > Added a check such that only singleton IOMMU groups can pin pages. > From the point when vendor driver pins any pages, consider IOMMU group > dirty page scope to be limited to pinned pages. > > To optimize to avoid walking list often, added flag > pinned_page_dirty_scope to indicate if all of the vfio_groups for each > vfio_domain in the domain_list dirty page scope is limited to pinned > pages. This flag is updated on first pinned pages request for that IOMMU > group and on attaching/detaching group. > > Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx> > Reviewed-by: Neo Jia <cjia@xxxxxxxxxx> > --- > drivers/vfio/vfio.c | 13 +++++-- > drivers/vfio/vfio_iommu_type1.c | 77 +++++++++++++++++++++++++++++++++++++++-- > include/linux/vfio.h | 4 ++- > 3 files changed, 87 insertions(+), 7 deletions(-) > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > index 210fcf426643..311b5e4e111e 100644 > --- a/drivers/vfio/vfio.c > +++ b/drivers/vfio/vfio.c > @@ -85,6 +85,7 @@ struct vfio_group { > atomic_t opened; > wait_queue_head_t container_q; > bool noiommu; > + unsigned int dev_counter; > struct kvm *kvm; > struct blocking_notifier_head notifier; > }; > @@ -555,6 +556,7 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group, > > mutex_lock(&group->device_lock); > list_add(&device->group_next, &group->device_list); > + group->dev_counter++; > mutex_unlock(&group->device_lock); > > return device; > @@ -567,6 +569,7 @@ static void vfio_device_release(struct kref *kref) > struct vfio_group *group = device->group; > > list_del(&device->group_next); > + group->dev_counter--; > mutex_unlock(&group->device_lock); > > dev_set_drvdata(device->dev, NULL); > @@ -1933,6 +1936,9 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, > if (!group) > return -ENODEV; > > + if (group->dev_counter > 1) > + return -EINVAL; > + > ret = vfio_group_add_container_user(group); > if (ret) > goto err_pin_pages; > @@ -1940,7 +1946,8 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, > container = group->container; > driver = container->iommu_driver; > if (likely(driver && driver->ops->pin_pages)) > - ret = driver->ops->pin_pages(container->iommu_data, user_pfn, > + ret = driver->ops->pin_pages(container->iommu_data, > + group->iommu_group, user_pfn, > npage, prot, phys_pfn); > else > ret = -ENOTTY; > @@ -2038,8 +2045,8 @@ int vfio_group_pin_pages(struct vfio_group *group, > driver = container->iommu_driver; > if (likely(driver && driver->ops->pin_pages)) > ret = driver->ops->pin_pages(container->iommu_data, > - user_iova_pfn, npage, > - prot, phys_pfn); > + group->iommu_group, user_iova_pfn, > + npage, prot, phys_pfn); > else > ret = -ENOTTY; > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 912629320719..deec09f4b0f6 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -72,6 +72,7 @@ struct vfio_iommu { > bool v2; > bool nesting; > bool dirty_page_tracking; > + bool pinned_page_dirty_scope; > }; > > struct vfio_domain { > @@ -99,6 +100,7 @@ struct vfio_group { > struct iommu_group *iommu_group; > struct list_head next; > bool mdev_group; /* An mdev group */ > + bool pinned_page_dirty_scope; > }; > > struct vfio_iova { > @@ -132,6 +134,10 @@ struct vfio_regions { > static int put_pfn(unsigned long pfn, int prot); > static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu); > > +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, > + struct iommu_group *iommu_group); > + > +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu); > /* > * This code handles mapping and unmapping of user data buffers > * into DMA'ble space using the IOMMU > @@ -556,11 +562,13 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, > } > > static int vfio_iommu_type1_pin_pages(void *iommu_data, > + struct iommu_group *iommu_group, > unsigned long *user_pfn, > int npage, int prot, > unsigned long *phys_pfn) > { > struct vfio_iommu *iommu = iommu_data; > + struct vfio_group *group; > int i, j, ret; > unsigned long remote_vaddr; > struct vfio_dma *dma; > @@ -630,8 +638,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > (vpfn->iova - dma->iova) >> pgshift, 1); > } > } > - > ret = i; > + > + group = vfio_iommu_find_iommu_group(iommu, iommu_group); > + if (!group->pinned_page_dirty_scope) { > + group->pinned_page_dirty_scope = true; > + update_pinned_page_dirty_scope(iommu); > + } > + > goto pin_done; > > pin_unwind: > @@ -913,8 +927,11 @@ static int vfio_iova_dirty_bitmap(struct vfio_iommu *iommu, dma_addr_t iova, > npages = dma->size >> pgshift; > bitmap_size = DIRTY_BITMAP_BYTES(npages); > > - /* mark all pages dirty if all pages are pinned and mapped. */ > - if (dma->iommu_mapped) > + /* > + * mark all pages dirty if any IOMMU capable device is not able > + * to report dirty pages and all pages are pinned and mapped. > + */ > + if (!iommu->pinned_page_dirty_scope && dma->iommu_mapped) > bitmap_set(dma->bitmap, 0, npages); > > if (copy_to_user((void __user *)bitmap, dma->bitmap, bitmap_size)) > @@ -1393,6 +1410,51 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, > return NULL; > } > > +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, > + struct iommu_group *iommu_group) > +{ > + struct vfio_domain *domain; > + struct vfio_group *group = NULL; > + > + list_for_each_entry(domain, &iommu->domain_list, next) { > + group = find_iommu_group(domain, iommu_group); > + if (group) > + return group; > + } > + > + if (iommu->external_domain) > + group = find_iommu_group(iommu->external_domain, iommu_group); > + > + return group; > +} > + > +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu) > +{ > + struct vfio_domain *domain; > + struct vfio_group *group; > + > + list_for_each_entry(domain, &iommu->domain_list, next) { > + list_for_each_entry(group, &domain->group_list, next) { > + if (!group->pinned_page_dirty_scope) { > + iommu->pinned_page_dirty_scope = false; > + return; > + } > + } > + } > + > + if (iommu->external_domain) { > + domain = iommu->external_domain; > + list_for_each_entry(group, &domain->group_list, next) { > + if (!group->pinned_page_dirty_scope) { > + iommu->pinned_page_dirty_scope = false; > + return; > + } > + } > + } > + > + iommu->pinned_page_dirty_scope = true; > +} > + > static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, > phys_addr_t *base) > { > @@ -1799,6 +1861,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, > > list_add(&group->next, > &iommu->external_domain->group_list); > + group->pinned_page_dirty_scope = true; > + if (!iommu->pinned_page_dirty_scope) > + update_pinned_page_dirty_scope(iommu); A comment above this would be good since this wasn't entirely obvious, maybe: /* * Non-iommu backed group cannot dirty memory directly, * it can only use interfaces that provide dirty tracking. * The iommu scope can only be promoted with the addition * of a dirty tracking group. */ > mutex_unlock(&iommu->lock); > > return 0; > @@ -1921,6 +1986,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, > done: > /* Delete the old one and insert new iova list */ > vfio_iommu_iova_insert_copy(iommu, &iova_copy); > + iommu->pinned_page_dirty_scope = false; Likewise here: /* * An iommu backed group can dirty memory directly and therefore * demotes the iommu scope until it declares itself dirty tracking * capable via the page pinning interface. */ > mutex_unlock(&iommu->lock); > vfio_iommu_resv_free(&group_resv_regions); > > @@ -2073,6 +2139,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > struct vfio_iommu *iommu = iommu_data; > struct vfio_domain *domain; > struct vfio_group *group; > + bool update_dirty_scope = false; > LIST_HEAD(iova_copy); > > mutex_lock(&iommu->lock); > @@ -2080,6 +2147,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > if (iommu->external_domain) { > group = find_iommu_group(iommu->external_domain, iommu_group); > if (group) { > + update_dirty_scope = !group->pinned_page_dirty_scope; > list_del(&group->next); > kfree(group); > > @@ -2109,6 +2177,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > continue; > > vfio_iommu_detach_group(domain, group); > + update_dirty_scope = !group->pinned_page_dirty_scope; > list_del(&group->next); > kfree(group); > /* > @@ -2139,6 +2208,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > vfio_iommu_iova_free(&iova_copy); > > detach_group_done: > + if (update_dirty_scope) > + update_pinned_page_dirty_scope(iommu); And one more /* * Removal of a group without dirty tracking may * allow the iommu scope to be promoted. */ > mutex_unlock(&iommu->lock); > } > > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > index be2bd358b952..702e1d7b6e8b 100644 > --- a/include/linux/vfio.h > +++ b/include/linux/vfio.h > @@ -72,7 +72,9 @@ struct vfio_iommu_driver_ops { > struct iommu_group *group); > void (*detach_group)(void *iommu_data, > struct iommu_group *group); > - int (*pin_pages)(void *iommu_data, unsigned long *user_pfn, > + int (*pin_pages)(void *iommu_data, > + struct iommu_group *group, > + unsigned long *user_pfn, > int npage, int prot, > unsigned long *phys_pfn); > int (*unpin_pages)(void *iommu_data,