On Fri, 28 Oct 2016 15:33:58 +0800 Jike Song <jike.song@xxxxxxxxx> wrote: > On 10/27/2016 05:29 AM, Kirti Wankhede wrote: > > Added blocking notifier to IOMMU TYPE1 driver to notify vendor drivers > > about DMA_UNMAP. > > Exported two APIs vfio_register_notifier() and vfio_unregister_notifier(). > > Vendor driver should register notifer using these APIs. > > Vendor driver should use VFIO_IOMMU_NOTIFY_DMA_UNMAP action to invalidate > > mappings. > > > > Signed-off-by: Kirti Wankhede <kwankhede@xxxxxxxxxx> > > Signed-off-by: Neo Jia <cjia@xxxxxxxxxx> > > Change-Id: I5910d0024d6be87f3e8d3e0ca0eaeaaa0b17f271 > > --- > > drivers/vfio/vfio.c | 73 +++++++++++++++++++++++++++++++++ > > drivers/vfio/vfio_iommu_type1.c | 89 ++++++++++++++++++++++++++++++++++++----- > > include/linux/vfio.h | 11 +++++ > > 3 files changed, 163 insertions(+), 10 deletions(-) > > > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > > index 28b50ca14c52..ff05ac6b1e90 100644 > > --- a/drivers/vfio/vfio.c > > +++ b/drivers/vfio/vfio.c > > @@ -1891,6 +1891,79 @@ err_unpin_pages: > > } > > EXPORT_SYMBOL(vfio_unpin_pages); > > > > +int vfio_register_notifier(struct device *dev, struct notifier_block *nb) > > +{ > > + struct vfio_container *container; > > + struct vfio_group *group; > > + struct vfio_iommu_driver *driver; > > + ssize_t ret; > > + > > + if (!dev || !nb) > > + return -EINVAL; > > + > > + group = vfio_group_get_from_dev(dev); > > + if (IS_ERR(group)) > > + return PTR_ERR(group); > > + > > + ret = vfio_group_add_container_user(group); > > + if (ret) > > + goto err_register_nb; > > + > > + container = group->container; > > + down_read(&container->group_lock); > > + > > + driver = container->iommu_driver; > > + if (likely(driver && driver->ops->register_notifier)) > > + ret = driver->ops->register_notifier(container->iommu_data, nb); > > + else > > + ret = -EINVAL; > > + > > + up_read(&container->group_lock); > > + vfio_group_try_dissolve_container(group); > > + > > +err_register_nb: > > + vfio_group_put(group); > > + return ret; > > +} > > +EXPORT_SYMBOL(vfio_register_notifier); > > + > > +int vfio_unregister_notifier(struct device *dev, struct notifier_block *nb) > > +{ > > + struct vfio_container *container; > > + struct vfio_group *group; > > + struct vfio_iommu_driver *driver; > > + ssize_t ret; > > + > > + if (!dev || !nb) > > + return -EINVAL; > > + > > + group = vfio_group_get_from_dev(dev); > > + if (IS_ERR(group)) > > + return PTR_ERR(group); > > + > > + ret = vfio_group_add_container_user(group); > > + if (ret) > > + goto err_unregister_nb; > > + > > + container = group->container; > > + down_read(&container->group_lock); > > + > > + driver = container->iommu_driver; > > + if (likely(driver && driver->ops->unregister_notifier)) > > + ret = driver->ops->unregister_notifier(container->iommu_data, > > + nb); > > + else > > + ret = -EINVAL; > > + > > + up_read(&container->group_lock); > > + vfio_group_try_dissolve_container(group); > > + > > +err_unregister_nb: > > + vfio_group_put(group); > > + return ret; > > +} > > +EXPORT_SYMBOL(vfio_unregister_notifier); > > + > > /** > > * Module/class support > > */ > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > > index 5add11a147e1..a4bd331ac0fd 100644 > > --- a/drivers/vfio/vfio_iommu_type1.c > > +++ b/drivers/vfio/vfio_iommu_type1.c > > @@ -37,6 +37,7 @@ > > #include <linux/vfio.h> > > #include <linux/workqueue.h> > > #include <linux/mdev.h> > > +#include <linux/notifier.h> > > > > #define DRIVER_VERSION "0.2" > > #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@xxxxxxxxxx>" > > @@ -59,6 +60,7 @@ struct vfio_iommu { > > struct vfio_domain *external_domain; /* domain for external user */ > > struct mutex lock; > > struct rb_root dma_list; > > + struct blocking_notifier_head notifier; > > bool v2; > > bool nesting; > > }; > > @@ -549,7 +551,8 @@ static long vfio_iommu_type1_pin_pages(void *iommu_data, > > > > mutex_lock(&iommu->lock); > > > > - if (!iommu->external_domain) { > > + /* Fail if notifier list is empty */ > > + if ((!iommu->external_domain) || (!iommu->notifier.head)) { > > ret = -EINVAL; > > goto pin_done; > > } > > @@ -768,6 +771,50 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) > > return bitmap; > > } > > > > +/* > > + * This function finds pfn in domain->external_addr_space->pfn_list for given > > + * iova range. If pfn exist, notify pfn to registered notifier list. On > > + * receiving notifier callback, vendor driver should invalidate the mapping and > > + * call vfio_unpin_pages() to unpin this pfn. With that vfio_pfn for this pfn > > + * gets removed from rb tree of pfn_list. That re-arranges rb tree, so while > > + * searching for next vfio_pfn in rb tree, start search from first node again. > > + * If any vendor driver doesn't unpin that pfn, vfio_pfn would not get removed > > + * from rb tree and so in next search vfio_pfn would be same as previous > > + * vfio_pfn. In that case, exit from loop. > > + */ > > +static void vfio_notifier_call_chain(struct vfio_iommu *iommu, > > + struct vfio_iommu_type1_dma_unmap *unmap) > > +{ > > + struct vfio_domain *domain = iommu->external_domain; > > + struct rb_node *n; > > + struct vfio_pfn *vpfn = NULL, *prev_vpfn; > > + > > + do { > > + prev_vpfn = vpfn; > > + mutex_lock(&domain->external_addr_space->pfn_list_lock); > > + > > + n = rb_first(&domain->external_addr_space->pfn_list); > > + > > + for (; n; n = rb_next(n), vpfn = NULL) { > > + vpfn = rb_entry(n, struct vfio_pfn, node); > > + > > + if ((vpfn->iova >= unmap->iova) && > > + (vpfn->iova < unmap->iova + unmap->size)) > > + break; > > + } > > + > > + mutex_unlock(&domain->external_addr_space->pfn_list_lock); > > + > > + /* Notify any listeners about DMA_UNMAP */ > > + if (vpfn) > > + blocking_notifier_call_chain(&iommu->notifier, > > + VFIO_IOMMU_NOTIFY_DMA_UNMAP, > > + &vpfn->pfn); > > Hi Kirti, > > The information carried by notifier is only a pfn. > > Since your pin/unpin interfaces design, it's the vendor driver who should > guarantee pin/unpin same times. To achieve that, the vendor driver must > cache it's iova->pfn mapping on its side, to avoid pinning a same page > for multiple times. > > With the notifier carrying only a pfn, to find the iova by this pfn, > the vendor driver must *also* keep a reverse-mapping. That's a bit > too much. > > Since the vendor could also suffer from IOMMU-compatible problem, > which means a local cache is always helpful, so I'd like to have the > iova carried to the notifier. > > What'd you say? I agree, the pfn is not unique, multiple guest pfns (iovas) might be backed by the same host pfn. DMA_UNMAP calls are based on iova, the notifier through to the vendor driver must be based on the same. Thanks, Alex > > + } while (vpfn && (prev_vpfn != vpfn)); > > + > > + WARN_ON(vpfn); > > +} > > + > > static int vfio_dma_do_unmap(struct vfio_iommu *iommu, > > struct vfio_iommu_type1_dma_unmap *unmap) > > { > > @@ -844,6 +891,9 @@ unlock: > > /* Report how much was unmapped */ > > unmap->size = unmapped; > > > > + if (unmapped && iommu->external_domain) > > + vfio_notifier_call_chain(iommu, unmap); > > + > > return ret; > > } > > > > @@ -1418,6 +1468,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) > > INIT_LIST_HEAD(&iommu->domain_list); > > iommu->dma_list = RB_ROOT; > > mutex_init(&iommu->lock); > > + BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); > > > > return iommu; > > } > > @@ -1555,16 +1606,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, > > return -ENOTTY; > > } > > > > +static int vfio_iommu_type1_register_notifier(void *iommu_data, > > + struct notifier_block *nb) > > +{ > > + struct vfio_iommu *iommu = iommu_data; > > + > > + return blocking_notifier_chain_register(&iommu->notifier, nb); > > +} > > + > > +static int vfio_iommu_type1_unregister_notifier(void *iommu_data, > > + struct notifier_block *nb) > > +{ > > + struct vfio_iommu *iommu = iommu_data; > > + > > + return blocking_notifier_chain_unregister(&iommu->notifier, nb); > > +} > > + > > static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { > > - .name = "vfio-iommu-type1", > > - .owner = THIS_MODULE, > > - .open = vfio_iommu_type1_open, > > - .release = vfio_iommu_type1_release, > > - .ioctl = vfio_iommu_type1_ioctl, > > - .attach_group = vfio_iommu_type1_attach_group, > > - .detach_group = vfio_iommu_type1_detach_group, > > - .pin_pages = vfio_iommu_type1_pin_pages, > > - .unpin_pages = vfio_iommu_type1_unpin_pages, > > + .name = "vfio-iommu-type1", > > + .owner = THIS_MODULE, > > + .open = vfio_iommu_type1_open, > > + .release = vfio_iommu_type1_release, > > + .ioctl = vfio_iommu_type1_ioctl, > > + .attach_group = vfio_iommu_type1_attach_group, > > + .detach_group = vfio_iommu_type1_detach_group, > > + .pin_pages = vfio_iommu_type1_pin_pages, > > + .unpin_pages = vfio_iommu_type1_unpin_pages, > > + .register_notifier = vfio_iommu_type1_register_notifier, > > + .unregister_notifier = vfio_iommu_type1_unregister_notifier, > > }; > > > > static int __init vfio_iommu_type1_init(void) > > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > > index 0609a2052846..4c91ce8bfaeb 100644 > > --- a/include/linux/vfio.h > > +++ b/include/linux/vfio.h > > @@ -80,6 +80,10 @@ struct vfio_iommu_driver_ops { > > unsigned long *phys_pfn); > > long (*unpin_pages)(void *iommu_data, unsigned long *pfn, > > long npage); > > + int (*register_notifier)(void *iommu_data, > > + struct notifier_block *nb); > > + int (*unregister_notifier)(void *iommu_data, > > + struct notifier_block *nb); > > }; > > > > extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); > > @@ -137,6 +141,13 @@ extern long vfio_pin_pages(struct device *dev, unsigned long *user_pfn, > > extern long vfio_unpin_pages(struct device *dev, unsigned long *pfn, > > long npage); > > > > +#define VFIO_IOMMU_NOTIFY_DMA_UNMAP 1 > > + > > +extern int vfio_register_notifier(struct device *dev, > > + struct notifier_block *nb); > > + > > +extern int vfio_unregister_notifier(struct device *dev, > > + struct notifier_block *nb); > > /* > > * IRQfd - generic > > */ > > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html