Add some housekeeping code for IO page fault dilivery. Add a fault field in the iommufd_hw_pagetable structure to store pending IO page faults and other related data. The fault field is allocated when an IOPF-capable user HWPT (indicated by IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE being set in the allocation user data) is allocated. This field exists until the HWPT is destroyed. This also implies that it is possible to determine whether a HWPT is IOPF capable by checking the fault field. When an IOPF-capable HWPT is attached to a device (could also be a PASID of a device in the future), a fault cookie is allocated and set to the device. The cookie is cleared and freed when HWPT is detached from the device. Signed-off-by: Yi Liu <yi.l.liu@xxxxxxxxx> Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx> --- drivers/iommu/iommufd/iommufd_private.h | 12 +++++ drivers/iommu/iommufd/device.c | 61 +++++++++++++++++++++++-- drivers/iommu/iommufd/hw_pagetable.c | 55 ++++++++++++++++++++++ 3 files changed, 125 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index e951815f5707..5ff139acc5c0 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -236,6 +236,13 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd, int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); +struct hw_pgtable_fault { + struct mutex mutex; + struct list_head deliver; + struct list_head response; + struct eventfd_ctx *trigger; +}; + /* * A HW pagetable is called an iommu_domain inside the kernel. This user object * allows directly creating and inspecting the domains. Domains that have kernel @@ -252,6 +259,7 @@ struct iommufd_hw_pagetable { bool msi_cookie : 1; /* Head at iommufd_ioas::hwpt_list */ struct list_head hwpt_item; + struct hw_pgtable_fault *fault; }; struct iommufd_hw_pagetable * @@ -314,6 +322,10 @@ struct iommufd_device { bool has_user_data; }; +struct iommufd_fault_cookie { + struct iommufd_device *idev; +}; + static inline struct iommufd_device * iommufd_get_device(struct iommufd_ucmd *ucmd, u32 id) { diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 29b212714e2c..3408f1fc3e9f 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -374,6 +374,44 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup, return 0; } +static int iommufd_device_set_fault_cookie(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev, + ioasid_t pasid) +{ + struct iommufd_fault_cookie *fcookie, *curr; + + if (!hwpt->fault) + return 0; + + fcookie = kzalloc(sizeof(*fcookie), GFP_KERNEL); + if (!fcookie) + return -ENOMEM; + fcookie->idev = idev; + + curr = iommu_set_device_fault_cookie(idev->dev, pasid, fcookie); + if (IS_ERR(curr)) { + kfree(fcookie); + return PTR_ERR(curr); + } + kfree(curr); + + return 0; +} + +static void iommufd_device_unset_fault_cookie(struct iommufd_hw_pagetable *hwpt, + struct iommufd_device *idev, + ioasid_t pasid) +{ + struct iommufd_fault_cookie *curr; + + if (!hwpt->fault) + return; + + curr = iommu_set_device_fault_cookie(idev->dev, pasid, NULL); + WARN_ON(IS_ERR(curr)); + kfree(curr); +} + int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev) { @@ -398,6 +436,10 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, if (rc) goto err_unlock; + rc = iommufd_device_set_fault_cookie(hwpt, idev, 0); + if (rc) + goto err_unresv; + /* * Only attach to the group once for the first device that is in the * group. All the other devices will follow this attachment. The user @@ -408,17 +450,21 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, if (list_empty(&idev->igroup->device_list)) { rc = iommufd_group_setup_msi(idev->igroup, hwpt); if (rc) - goto err_unresv; + goto err_unset; rc = iommu_attach_group(hwpt->domain, idev->igroup->group); if (rc) - goto err_unresv; + goto err_unset; idev->igroup->hwpt = hwpt; } + refcount_inc(&hwpt->obj.users); list_add_tail(&idev->group_item, &idev->igroup->device_list); mutex_unlock(&idev->igroup->lock); return 0; + +err_unset: + iommufd_device_unset_fault_cookie(hwpt, idev, 0); err_unresv: iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); err_unlock: @@ -433,6 +479,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev) mutex_lock(&idev->igroup->lock); list_del(&idev->group_item); + iommufd_device_unset_fault_cookie(hwpt, idev, 0); if (list_empty(&idev->igroup->device_list)) { iommu_detach_group(hwpt->domain, idev->igroup->group); idev->igroup->hwpt = NULL; @@ -502,9 +549,14 @@ iommufd_device_do_replace(struct iommufd_device *idev, if (rc) goto err_unresv; + iommufd_device_unset_fault_cookie(old_hwpt, idev, 0); + rc = iommufd_device_set_fault_cookie(hwpt, idev, 0); + if (rc) + goto err_unresv; + rc = iommu_group_replace_domain(igroup->group, hwpt->domain); if (rc) - goto err_unresv; + goto err_replace; if (hwpt->ioas != old_hwpt->ioas) { list_for_each_entry(cur, &igroup->device_list, group_item) @@ -526,6 +578,9 @@ iommufd_device_do_replace(struct iommufd_device *idev, /* Caller must destroy old_hwpt */ return old_hwpt; +err_replace: + iommufd_device_unset_fault_cookie(hwpt, idev, 0); + iommufd_device_set_fault_cookie(old_hwpt, idev, 0); err_unresv: list_for_each_entry(cur, &igroup->device_list, group_item) iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev); diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 47ec7ddd5f5d..d6d550c3d0cc 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -3,12 +3,16 @@ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ #include <linux/iommu.h> +#include <linux/eventfd.h> #include <uapi/linux/iommufd.h> #include "../iommu-priv.h" #include "iommufd_private.h" #include "iommufd_test.h" +static struct hw_pgtable_fault *hw_pagetable_fault_alloc(int eventfd); +static void hw_pagetable_fault_free(struct hw_pgtable_fault *fault); + void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) { struct iommufd_hw_pagetable *hwpt = @@ -27,6 +31,9 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) if (hwpt->parent) refcount_dec(&hwpt->parent->obj.users); + + if (hwpt->fault) + hw_pagetable_fault_free(hwpt->fault); refcount_dec(&hwpt->ioas->obj.users); } @@ -255,6 +262,11 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) goto out_put_pt; } + if (!parent && (cmd->flags & IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE)) { + rc = -EINVAL; + goto out_put_pt; + } + if (klen) { if (!cmd->data_len) { rc = -EINVAL; @@ -282,6 +294,14 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) goto out_unlock; } + if (cmd->flags & IOMMU_HWPT_ALLOC_FLAGS_IOPF_CAPABLE) { + hwpt->fault = hw_pagetable_fault_alloc(cmd->event_fd); + if (IS_ERR(hwpt->fault)) { + rc = PTR_ERR(hwpt->fault); + goto out_hwpt; + } + } + cmd->out_hwpt_id = hwpt->obj.id; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); if (rc) @@ -346,3 +366,38 @@ int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) iommufd_put_object(&hwpt->obj); return rc; } + +static struct hw_pgtable_fault *hw_pagetable_fault_alloc(int eventfd) +{ + struct hw_pgtable_fault *fault; + int rc; + + fault = kzalloc(sizeof(*fault), GFP_KERNEL); + if (!fault) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&fault->deliver); + INIT_LIST_HEAD(&fault->response); + mutex_init(&fault->mutex); + + fault->trigger = eventfd_ctx_fdget(eventfd); + if (IS_ERR(fault->trigger)) { + rc = PTR_ERR(fault->trigger); + goto out_free; + } + + return fault; + +out_free: + kfree(fault); + return ERR_PTR(rc); +} + +static void hw_pagetable_fault_free(struct hw_pgtable_fault *fault) +{ + WARN_ON(!list_empty(&fault->deliver)); + WARN_ON(!list_empty(&fault->response)); + + eventfd_ctx_put(fault->trigger); + kfree(fault); +} -- 2.34.1