Hi Nicolin, On Tue, Aug 27, 2024 at 09:59:43AM -0700, Nicolin Chen wrote: > Introduce a pair of new ioctls to set/unset a per-viommu virtual device id > that should be linked to a physical device id via an idev pointer. > > Continue the support IOMMU_VIOMMU_TYPE_DEFAULT for a core-managed viommu. > Provide a lookup function for drivers to load device pointer by a virtual > device id. > > Add a rw_semaphore protection around the vdev_id list. Any future ioctl > handlers that potentially access the list must grab the lock too. > > Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx> > --- > drivers/iommu/iommufd/device.c | 12 +++ > drivers/iommu/iommufd/iommufd_private.h | 21 ++++ > drivers/iommu/iommufd/main.c | 6 ++ > drivers/iommu/iommufd/viommu.c | 121 ++++++++++++++++++++++++ > include/uapi/linux/iommufd.h | 40 ++++++++ > 5 files changed, 200 insertions(+) > > diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c > index 5fd3dd420290..3ad759971b32 100644 > --- a/drivers/iommu/iommufd/device.c > +++ b/drivers/iommu/iommufd/device.c > @@ -136,6 +136,18 @@ void iommufd_device_destroy(struct iommufd_object *obj) > struct iommufd_device *idev = > container_of(obj, struct iommufd_device, obj); > > + /* Unlocked since there should be no race in a destroy() */ > + if (idev->vdev_id) { > + struct iommufd_vdev_id *vdev_id = idev->vdev_id; > + struct iommufd_viommu *viommu = vdev_id->viommu; > + struct iommufd_vdev_id *old; > + > + old = xa_cmpxchg(&viommu->vdev_ids, vdev_id->id, vdev_id, NULL, > + GFP_KERNEL); > + WARN_ON(old != vdev_id); > + kfree(vdev_id); > + idev->vdev_id = NULL; > + } > iommu_device_release_dma_owner(idev->dev); > iommufd_put_group(idev->igroup); > if (!iommufd_selftest_is_mock_dev(idev->dev)) > diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h > index 1f2a1c133b9a..2c6e168c5300 100644 > --- a/drivers/iommu/iommufd/iommufd_private.h > +++ b/drivers/iommu/iommufd/iommufd_private.h > @@ -416,6 +416,7 @@ struct iommufd_device { > struct iommufd_object obj; > struct iommufd_ctx *ictx; > struct iommufd_group *igroup; > + struct iommufd_vdev_id *vdev_id; > struct list_head group_item; > /* always the physical device */ > struct device *dev; > @@ -533,11 +534,31 @@ struct iommufd_viommu { > struct iommufd_ctx *ictx; > struct iommufd_hwpt_paging *hwpt; > > + /* The locking order is vdev_ids_rwsem -> igroup::lock */ > + struct rw_semaphore vdev_ids_rwsem; > + struct xarray vdev_ids; > + > unsigned int type; > }; > > +struct iommufd_vdev_id { > + struct iommufd_viommu *viommu; > + struct iommufd_device *idev; > + u64 id; > +}; > + > +static inline struct iommufd_viommu * > +iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id) > +{ > + return container_of(iommufd_get_object(ucmd->ictx, id, > + IOMMUFD_OBJ_VIOMMU), > + struct iommufd_viommu, obj); > +} > + > int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd); > void iommufd_viommu_destroy(struct iommufd_object *obj); > +int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd); > +int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd); > > #ifdef CONFIG_IOMMUFD_TEST > int iommufd_test(struct iommufd_ucmd *ucmd); > diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c > index 288ee51b6829..199ad90fa36b 100644 > --- a/drivers/iommu/iommufd/main.c > +++ b/drivers/iommu/iommufd/main.c > @@ -334,6 +334,8 @@ union ucmd_buffer { > struct iommu_option option; > struct iommu_vfio_ioas vfio_ioas; > struct iommu_viommu_alloc viommu; > + struct iommu_viommu_set_vdev_id set_vdev_id; > + struct iommu_viommu_unset_vdev_id unset_vdev_id; > #ifdef CONFIG_IOMMUFD_TEST > struct iommu_test_cmd test; > #endif > @@ -387,6 +389,10 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { > __reserved), > IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl, > struct iommu_viommu_alloc, out_viommu_id), > + IOCTL_OP(IOMMU_VIOMMU_SET_VDEV_ID, iommufd_viommu_set_vdev_id, > + struct iommu_viommu_set_vdev_id, vdev_id), > + IOCTL_OP(IOMMU_VIOMMU_UNSET_VDEV_ID, iommufd_viommu_unset_vdev_id, > + struct iommu_viommu_unset_vdev_id, vdev_id), > #ifdef CONFIG_IOMMUFD_TEST > IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), > #endif > diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c > index 200653a4bf57..8ffcd72b16b8 100644 > --- a/drivers/iommu/iommufd/viommu.c > +++ b/drivers/iommu/iommufd/viommu.c > @@ -8,6 +8,15 @@ void iommufd_viommu_destroy(struct iommufd_object *obj) > { > struct iommufd_viommu *viommu = > container_of(obj, struct iommufd_viommu, obj); > + struct iommufd_vdev_id *vdev_id; > + unsigned long index; > + > + xa_for_each(&viommu->vdev_ids, index, vdev_id) { > + /* Unlocked since there should be no race in a destroy() */ > + vdev_id->idev->vdev_id = NULL; > + kfree(vdev_id); > + } > + xa_destroy(&viommu->vdev_ids); > > refcount_dec(&viommu->hwpt->common.obj.users); > } > @@ -53,6 +62,9 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) > viommu->ictx = ucmd->ictx; > viommu->hwpt = hwpt_paging; > > + xa_init(&viommu->vdev_ids); > + init_rwsem(&viommu->vdev_ids_rwsem); > + > refcount_inc(&viommu->hwpt->common.obj.users); > > cmd->out_viommu_id = viommu->obj.id; > @@ -70,3 +82,112 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd) > iommufd_put_object(ucmd->ictx, &idev->obj); > return rc; > } > + > +int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd) > +{ > + struct iommu_viommu_set_vdev_id *cmd = ucmd->cmd; > + struct iommufd_vdev_id *vdev_id, *curr; > + struct iommufd_viommu *viommu; > + struct iommufd_device *idev; > + int rc = 0; > + > + if (cmd->vdev_id > ULONG_MAX) > + return -EINVAL; > + > + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); > + if (IS_ERR(viommu)) > + return PTR_ERR(viommu); > + > + idev = iommufd_get_device(ucmd, cmd->dev_id); > + if (IS_ERR(idev)) { > + rc = PTR_ERR(idev); > + goto out_put_viommu; > + } > + > + down_write(&viommu->vdev_ids_rwsem); > + mutex_lock(&idev->igroup->lock); > + if (idev->vdev_id) { > + rc = -EEXIST; > + goto out_unlock_igroup; > + } > + > + vdev_id = kzalloc(sizeof(*vdev_id), GFP_KERNEL); > + if (!vdev_id) { > + rc = -ENOMEM; > + goto out_unlock_igroup; > + } > + > + vdev_id->idev = idev; > + vdev_id->viommu = viommu; > + vdev_id->id = cmd->vdev_id; My understanding of IOMMUFD is very little, but AFAICT, that means that it’s assumed that each device can only have one stream ID(RID)? As I can see in patch 17 in arm_smmu_convert_viommu_vdev_id(), it converts the virtual ID to a physical one using master->streams[0].id. Is that correct or am I missing something? As I am looking at similar problem for paravirtual IOMMU with pKVM, where the UAPI would be something similar to: GET_NUM_END_POINTS(dev) => nr_sids SET_END_POINT_VSID(dev, sid_index, vsid) Similar to what VFIO does with IRQs. As a device can have many SIDs. Thanks, Mostafa > + > + curr = xa_cmpxchg(&viommu->vdev_ids, cmd->vdev_id, NULL, vdev_id, > + GFP_KERNEL); > + if (curr) { > + rc = xa_err(curr) ? : -EBUSY; > + goto out_free; > + } > + > + idev->vdev_id = vdev_id; > + goto out_unlock_igroup; > + > +out_free: > + kfree(vdev_id); > +out_unlock_igroup: > + mutex_unlock(&idev->igroup->lock); > + up_write(&viommu->vdev_ids_rwsem); > + iommufd_put_object(ucmd->ictx, &idev->obj); > +out_put_viommu: > + iommufd_put_object(ucmd->ictx, &viommu->obj); > + return rc; > +} > + > +int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd) > +{ > + struct iommu_viommu_unset_vdev_id *cmd = ucmd->cmd; > + struct iommufd_viommu *viommu; > + struct iommufd_vdev_id *old; > + struct iommufd_device *idev; > + int rc = 0; > + > + if (cmd->vdev_id > ULONG_MAX) > + return -EINVAL; > + > + viommu = iommufd_get_viommu(ucmd, cmd->viommu_id); > + if (IS_ERR(viommu)) > + return PTR_ERR(viommu); > + > + idev = iommufd_get_device(ucmd, cmd->dev_id); > + if (IS_ERR(idev)) { > + rc = PTR_ERR(idev); > + goto out_put_viommu; > + } > + > + down_write(&viommu->vdev_ids_rwsem); > + mutex_lock(&idev->igroup->lock); > + if (!idev->vdev_id) { > + rc = -ENOENT; > + goto out_unlock_igroup; > + } > + if (idev->vdev_id->id != cmd->vdev_id) { > + rc = -EINVAL; > + goto out_unlock_igroup; > + } > + > + old = xa_cmpxchg(&viommu->vdev_ids, idev->vdev_id->id, > + idev->vdev_id, NULL, GFP_KERNEL); > + if (xa_is_err(old)) { > + rc = xa_err(old); > + goto out_unlock_igroup; > + } > + kfree(old); > + idev->vdev_id = NULL; > + > +out_unlock_igroup: > + mutex_unlock(&idev->igroup->lock); > + up_write(&viommu->vdev_ids_rwsem); > + iommufd_put_object(ucmd->ictx, &idev->obj); > +out_put_viommu: > + iommufd_put_object(ucmd->ictx, &viommu->obj); > + return rc; > +} > diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h > index 51ce6a019c34..1816e89c922d 100644 > --- a/include/uapi/linux/iommufd.h > +++ b/include/uapi/linux/iommufd.h > @@ -52,6 +52,8 @@ enum { > IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, > IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, > IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f, > + IOMMUFD_CMD_VIOMMU_SET_VDEV_ID = 0x90, > + IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID = 0x91, > }; > > /** > @@ -882,4 +884,42 @@ struct iommu_viommu_alloc { > __u32 out_viommu_id; > }; > #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) > + > +/** > + * struct iommu_viommu_set_vdev_id - ioctl(IOMMU_VIOMMU_SET_VDEV_ID) > + * @size: sizeof(struct iommu_viommu_set_vdev_id) > + * @viommu_id: viommu ID to associate with the device to store its virtual ID > + * @dev_id: device ID to set its virtual ID > + * @__reserved: Must be 0 > + * @vdev_id: Virtual device ID > + * > + * Set a viommu-specific virtual ID of a device > + */ > +struct iommu_viommu_set_vdev_id { > + __u32 size; > + __u32 viommu_id; > + __u32 dev_id; > + __u32 __reserved; > + __aligned_u64 vdev_id; > +}; > +#define IOMMU_VIOMMU_SET_VDEV_ID _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_SET_VDEV_ID) > + > +/** > + * struct iommu_viommu_unset_vdev_id - ioctl(IOMMU_VIOMMU_UNSET_VDEV_ID) > + * @size: sizeof(struct iommu_viommu_unset_vdev_id) > + * @viommu_id: viommu ID associated with the device to delete its virtual ID > + * @dev_id: device ID to unset its virtual ID > + * @__reserved: Must be 0 > + * @vdev_id: Virtual device ID (for verification) > + * > + * Unset a viommu-specific virtual ID of a device > + */ > +struct iommu_viommu_unset_vdev_id { > + __u32 size; > + __u32 viommu_id; > + __u32 dev_id; > + __u32 __reserved; > + __aligned_u64 vdev_id; > +}; > +#define IOMMU_VIOMMU_UNSET_VDEV_ID _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID) > #endif > -- > 2.43.0 >