Kernel access is the mode that VFIO "mdevs" use. In this case there is no struct device and no IOMMU connection. iommufd acts as a record keeper for accesses and returns the actual struct pages back to the caller to use however they need. eg with kmap or the DMA API. Each caller must create a struct iommufd_access with iommufd_access_create(), similar to how iommufd_device_bind() works. Using this struct the caller can access blocks of IOVA using iommufd_access_pin_pages() or iommufd_access_rw(). Callers must provide a callback that immediately unpins any IOVA being used within a range. This happens if userspace unmaps the IOVA under the pin. The implementation forwards the access requests directly the identical iopt infrastructure that manages the iopt_pages_user. Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> --- drivers/iommu/iommufd/device.c | 123 ++++++++++++++++++++++++ drivers/iommu/iommufd/io_pagetable.c | 7 +- drivers/iommu/iommufd/ioas.c | 2 + drivers/iommu/iommufd/iommufd_private.h | 5 + drivers/iommu/iommufd/main.c | 3 + include/linux/iommufd.h | 40 ++++++++ 6 files changed, 178 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 23b101db846f40..d34bdbcb84a40d 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -394,3 +394,126 @@ void iommufd_device_detach(struct iommufd_device *idev) refcount_dec(&idev->obj.users); } EXPORT_SYMBOL_GPL(iommufd_device_detach); + +struct iommufd_access_priv { + struct iommufd_object obj; + struct iommufd_access pub; + struct iommufd_ctx *ictx; + struct iommufd_ioas *ioas; + const struct iommufd_access_ops *ops; + void *data; + u32 ioas_access_list_id; +}; + +void iommufd_access_destroy_object(struct iommufd_object *obj) +{ + struct iommufd_access_priv *access = + container_of(obj, struct iommufd_access_priv, obj); + + WARN_ON(xa_erase(&access->ioas->access_list, + access->ioas_access_list_id) != access); + iommufd_ctx_put(access->ictx); + refcount_dec(&access->ioas->obj.users); +} + +struct iommufd_access * +iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id, + const struct iommufd_access_ops *ops, void *data) +{ + struct iommufd_access_priv *access; + struct iommufd_object *obj; + int rc; + + /* + * FIXME: should this be an object? It is much like a device but I can't + * forsee a use for it right now. On the other hand it costs almost + * nothing to do, so may as well.. + */ + access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); + if (IS_ERR(access)) + return &access->pub; + + obj = iommufd_get_object(ictx, ioas_id, IOMMUFD_OBJ_IOAS); + if (IS_ERR(obj)) { + rc = PTR_ERR(obj); + goto out_abort; + } + access->ioas = container_of(obj, struct iommufd_ioas, obj); + iommufd_put_object_keep_user(obj); + + rc = xa_alloc(&access->ioas->access_list, &access->ioas_access_list_id, + access, xa_limit_16b, GFP_KERNEL_ACCOUNT); + if (rc) + goto out_put_ioas; + + /* The calling driver is a user until iommufd_access_destroy() */ + refcount_inc(&access->obj.users); + access->ictx = ictx; + access->data = data; + access->pub.iopt = &access->ioas->iopt; + iommufd_ctx_get(ictx); + iommufd_object_finalize(ictx, &access->obj); + return &access->pub; +out_put_ioas: + refcount_dec(&access->ioas->obj.users); +out_abort: + iommufd_object_abort(ictx, &access->obj); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_GPL(iommufd_access_create); + +void iommufd_access_destroy(struct iommufd_access *access_pub) +{ + struct iommufd_access_priv *access = + container_of(access_pub, struct iommufd_access_priv, pub); + bool was_destroyed; + + was_destroyed = iommufd_object_destroy_user(access->ictx, &access->obj); + WARN_ON(!was_destroyed); +} +EXPORT_SYMBOL_GPL(iommufd_access_destroy); + +/** + * iommufd_access_notify_unmap - Notify users of an iopt to stop using it + * @iopt - iopt to work on + * @iova - Starting iova in the iopt + * @length - Number of bytes + * + * After this function returns there should be no users attached to the pages + * linked to this iopt that intersect with iova,length. Anyone that has attached + * a user through iopt_access_pages() needs to detatch it through + * iommufd_access_unpin_pages() before this function returns. + * + * The unmap callback may not call or wait for a iommufd_access_destroy() to + * complete. Once iommufd_access_destroy() returns no ops are running and no + * future ops will be called. + */ +void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, + unsigned long length) +{ + struct iommufd_ioas *ioas = + container_of(iopt, struct iommufd_ioas, iopt); + struct iommufd_access_priv *access; + unsigned long index; + + xa_lock(&ioas->access_list); + xa_for_each(&ioas->access_list, index, access) { + if (!iommufd_lock_obj(&access->obj)) + continue; + xa_unlock(&ioas->access_list); + + access->ops->unmap(access->data, iova, length); + + iommufd_put_object(&access->obj); + xa_lock(&ioas->access_list); + } + xa_unlock(&ioas->access_list); +} + +int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, + void *data, size_t len, bool write) +{ + /* FIXME implement me */ + return -EINVAL; +} +EXPORT_SYMBOL_GPL(iommufd_access_rw); diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 7434bc8b393bbd..dfc7362b78c6fb 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -349,6 +349,7 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, * is NULL. This prevents domain attach/detatch from running * concurrently with cleaning up the area. */ +again: down_read(&iopt->domains_rwsem); down_write(&iopt->iova_rwsem); while ((area = iopt_area_iter_first(iopt, start, end))) { @@ -377,8 +378,10 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, area->prevent_users = true; up_write(&iopt->iova_rwsem); up_read(&iopt->domains_rwsem); - /* Later patch calls back to drivers to unmap */ - return -EBUSY; + iommufd_access_notify_unmap(iopt, area_first, + iopt_area_length(area)); + WARN_ON(READ_ONCE(area->num_users)); + goto again; } pages = area->pages; diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 42b9a04188a116..7222af13551828 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -17,6 +17,7 @@ void iommufd_ioas_destroy(struct iommufd_object *obj) rc = iopt_unmap_all(&ioas->iopt, NULL); WARN_ON(rc && rc != -ENOENT); iopt_destroy_table(&ioas->iopt); + WARN_ON(!xa_empty(&ioas->access_list)); mutex_destroy(&ioas->mutex); } @@ -35,6 +36,7 @@ struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx) INIT_LIST_HEAD(&ioas->hwpt_list); mutex_init(&ioas->mutex); + xa_init_flags(&ioas->access_list, XA_FLAGS_ALLOC); return ioas; out_abort: diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 0ede92b0aa32b4..540b36c0befa5e 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -52,6 +52,8 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, unsigned long length, unsigned long *unmapped); int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); +void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, + unsigned long length); int iopt_table_add_domain(struct io_pagetable *iopt, struct iommu_domain *domain); void iopt_table_remove_domain(struct io_pagetable *iopt, @@ -95,6 +97,7 @@ enum iommufd_object_type { IOMMUFD_OBJ_DEVICE, IOMMUFD_OBJ_HW_PAGETABLE, IOMMUFD_OBJ_IOAS, + IOMMUFD_OBJ_ACCESS, }; /* Base struct for all objects with a userspace ID handle. */ @@ -185,6 +188,7 @@ struct iommufd_ioas { struct io_pagetable iopt; struct mutex mutex; struct list_head hwpt_list; + struct xarray access_list; }; static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ucmd *ucmd, @@ -231,4 +235,5 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); void iommufd_device_destroy(struct iommufd_object *obj); +void iommufd_access_destroy_object(struct iommufd_object *obj); #endif diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index b09dbfc8009dc2..ed64b84b3b9337 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -327,6 +327,9 @@ void iommufd_ctx_put(struct iommufd_ctx *ictx) EXPORT_SYMBOL_GPL(iommufd_ctx_put); static struct iommufd_object_ops iommufd_object_ops[] = { + [IOMMUFD_OBJ_ACCESS] = { + .destroy = iommufd_access_destroy_object, + }, [IOMMUFD_OBJ_DEVICE] = { .destroy = iommufd_device_destroy, }, diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h index 477c3ea098f637..c072e400f3e645 100644 --- a/include/linux/iommufd.h +++ b/include/linux/iommufd.h @@ -13,10 +13,15 @@ struct page; struct iommufd_device; +struct iommufd_access; struct iommufd_ctx; struct io_pagetable; struct file; +struct iommufd_access { + struct io_pagetable *iopt; +}; + struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, struct device *dev, u32 *id); void iommufd_device_unbind(struct iommufd_device *idev); @@ -29,17 +34,46 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id, unsigned int flags); void iommufd_device_detach(struct iommufd_device *idev); +struct iommufd_access_ops { + void (*unmap)(void *data, unsigned long iova, unsigned long length); +}; + +struct iommufd_access * +iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id, + const struct iommufd_access_ops *ops, void *data); +void iommufd_access_destroy(struct iommufd_access *access); int iopt_access_pages(struct io_pagetable *iopt, unsigned long iova, unsigned long length, struct page **out_pages, bool write); void iopt_unaccess_pages(struct io_pagetable *iopt, unsigned long iova, unsigned long length); +static inline int iommufd_access_pin_pages(struct iommufd_access *access, + unsigned long iova, + unsigned long length, + struct page **out_pages, bool write) +{ + if (!IS_ENABLED(CONFIG_IOMMUFD)) + return -EOPNOTSUPP; + return iopt_access_pages(access->iopt, iova, length, out_pages, write); +} + +static inline void iommufd_access_unpin_pages(struct iommufd_access *access, + unsigned long iova, + unsigned long length) +{ + if (IS_ENABLED(CONFIG_IOMMUFD)) + iopt_unaccess_pages(access->iopt, iova, length); +} + void iommufd_ctx_get(struct iommufd_ctx *ictx); #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_ctx *iommufd_ctx_from_file(struct file *file); void iommufd_ctx_put(struct iommufd_ctx *ictx); + +int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, + void *data, size_t len, bool write); #else /* !CONFIG_IOMMUFD */ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) { @@ -49,5 +83,11 @@ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) static inline void iommufd_ctx_put(struct iommufd_ctx *ictx) { } + +static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, + void *data, size_t len, bool write) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_IOMMUFD */ #endif -- 2.37.3