Re: [PATCH v2 06/19] iommufd/viommu: Add IOMMU_VIOMMU_SET/UNSET_VDEV_ID ioctl

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 28/8/24 02:59, Nicolin Chen wrote:
Introduce a pair of new ioctls to set/unset a per-viommu virtual device id
that should be linked to a physical device id via an idev pointer.

Continue the support IOMMU_VIOMMU_TYPE_DEFAULT for a core-managed viommu.
Provide a lookup function for drivers to load device pointer by a virtual
device id.

Add a rw_semaphore protection around the vdev_id list. Any future ioctl
handlers that potentially access the list must grab the lock too.

Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx>
---
  drivers/iommu/iommufd/device.c          |  12 +++
  drivers/iommu/iommufd/iommufd_private.h |  21 ++++
  drivers/iommu/iommufd/main.c            |   6 ++
  drivers/iommu/iommufd/viommu.c          | 121 ++++++++++++++++++++++++
  include/uapi/linux/iommufd.h            |  40 ++++++++
  5 files changed, 200 insertions(+)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 5fd3dd420290..3ad759971b32 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -136,6 +136,18 @@ void iommufd_device_destroy(struct iommufd_object *obj)
  	struct iommufd_device *idev =
  		container_of(obj, struct iommufd_device, obj);
+ /* Unlocked since there should be no race in a destroy() */
+	if (idev->vdev_id) {
+		struct iommufd_vdev_id *vdev_id = idev->vdev_id;
+		struct iommufd_viommu *viommu = vdev_id->viommu;
+		struct iommufd_vdev_id *old;
+
+		old = xa_cmpxchg(&viommu->vdev_ids, vdev_id->id, vdev_id, NULL,
+				 GFP_KERNEL);
+		WARN_ON(old != vdev_id);
+		kfree(vdev_id);
+		idev->vdev_id = NULL;
+	}
  	iommu_device_release_dma_owner(idev->dev);
  	iommufd_put_group(idev->igroup);
  	if (!iommufd_selftest_is_mock_dev(idev->dev))
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 1f2a1c133b9a..2c6e168c5300 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -416,6 +416,7 @@ struct iommufd_device {
  	struct iommufd_object obj;
  	struct iommufd_ctx *ictx;
  	struct iommufd_group *igroup;
+	struct iommufd_vdev_id *vdev_id;
  	struct list_head group_item;
  	/* always the physical device */
  	struct device *dev;
@@ -533,11 +534,31 @@ struct iommufd_viommu {
  	struct iommufd_ctx *ictx;
  	struct iommufd_hwpt_paging *hwpt;
+ /* The locking order is vdev_ids_rwsem -> igroup::lock */
+	struct rw_semaphore vdev_ids_rwsem;
+	struct xarray vdev_ids;
+
  	unsigned int type;
  };
+struct iommufd_vdev_id {
+	struct iommufd_viommu *viommu;
+	struct iommufd_device *idev;
+	u64 id;
+};
+
+static inline struct iommufd_viommu *
+iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
+{
+	return container_of(iommufd_get_object(ucmd->ictx, id,
+					       IOMMUFD_OBJ_VIOMMU),
+			    struct iommufd_viommu, obj);
+}
+
  int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
  void iommufd_viommu_destroy(struct iommufd_object *obj);
+int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd);
+int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd);
#ifdef CONFIG_IOMMUFD_TEST
  int iommufd_test(struct iommufd_ucmd *ucmd);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 288ee51b6829..199ad90fa36b 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -334,6 +334,8 @@ union ucmd_buffer {
  	struct iommu_option option;
  	struct iommu_vfio_ioas vfio_ioas;
  	struct iommu_viommu_alloc viommu;
+	struct iommu_viommu_set_vdev_id set_vdev_id;
+	struct iommu_viommu_unset_vdev_id unset_vdev_id;
  #ifdef CONFIG_IOMMUFD_TEST
  	struct iommu_test_cmd test;
  #endif
@@ -387,6 +389,10 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
  		 __reserved),
  	IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
  		 struct iommu_viommu_alloc, out_viommu_id),
+	IOCTL_OP(IOMMU_VIOMMU_SET_VDEV_ID, iommufd_viommu_set_vdev_id,
+		 struct iommu_viommu_set_vdev_id, vdev_id),
+	IOCTL_OP(IOMMU_VIOMMU_UNSET_VDEV_ID, iommufd_viommu_unset_vdev_id,
+		 struct iommu_viommu_unset_vdev_id, vdev_id),
  #ifdef CONFIG_IOMMUFD_TEST
  	IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
  #endif
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 200653a4bf57..8ffcd72b16b8 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -8,6 +8,15 @@ void iommufd_viommu_destroy(struct iommufd_object *obj)
  {
  	struct iommufd_viommu *viommu =
  		container_of(obj, struct iommufd_viommu, obj);
+	struct iommufd_vdev_id *vdev_id;
+	unsigned long index;
+
+	xa_for_each(&viommu->vdev_ids, index, vdev_id) {
+		/* Unlocked since there should be no race in a destroy() */
+		vdev_id->idev->vdev_id = NULL;
+		kfree(vdev_id);
+	}
+	xa_destroy(&viommu->vdev_ids);
refcount_dec(&viommu->hwpt->common.obj.users);
  }
@@ -53,6 +62,9 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
  	viommu->ictx = ucmd->ictx;
  	viommu->hwpt = hwpt_paging;
+ xa_init(&viommu->vdev_ids);
+	init_rwsem(&viommu->vdev_ids_rwsem);
+
  	refcount_inc(&viommu->hwpt->common.obj.users);
cmd->out_viommu_id = viommu->obj.id;
@@ -70,3 +82,112 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
  	iommufd_put_object(ucmd->ictx, &idev->obj);
  	return rc;
  }
+
+int iommufd_viommu_set_vdev_id(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_viommu_set_vdev_id *cmd = ucmd->cmd;
+	struct iommufd_vdev_id *vdev_id, *curr;
+	struct iommufd_viommu *viommu;
+	struct iommufd_device *idev;
+	int rc = 0;
+
+	if (cmd->vdev_id > ULONG_MAX)
+		return -EINVAL;
+
+	viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
+	if (IS_ERR(viommu))
+		return PTR_ERR(viommu);
+
+	idev = iommufd_get_device(ucmd, cmd->dev_id);
+	if (IS_ERR(idev)) {
+		rc = PTR_ERR(idev);
+		goto out_put_viommu;
+	}
+
+	down_write(&viommu->vdev_ids_rwsem);
+	mutex_lock(&idev->igroup->lock);
+	if (idev->vdev_id) {
+		rc = -EEXIST;
+		goto out_unlock_igroup;
+	}
+
+	vdev_id = kzalloc(sizeof(*vdev_id), GFP_KERNEL);
+	if (!vdev_id) {
+		rc = -ENOMEM;
+		goto out_unlock_igroup;
+	}
+
+	vdev_id->idev = idev;
+	vdev_id->viommu = viommu;
+	vdev_id->id = cmd->vdev_id;
+
+	curr = xa_cmpxchg(&viommu->vdev_ids, cmd->vdev_id, NULL, vdev_id,
+			  GFP_KERNEL);
+	if (curr) {
+		rc = xa_err(curr) ? : -EBUSY;
+		goto out_free;
+	}
+
+	idev->vdev_id = vdev_id;
+	goto out_unlock_igroup;
+
+out_free:
+	kfree(vdev_id);
+out_unlock_igroup:
+	mutex_unlock(&idev->igroup->lock);
+	up_write(&viommu->vdev_ids_rwsem);
+	iommufd_put_object(ucmd->ictx, &idev->obj);
+out_put_viommu:
+	iommufd_put_object(ucmd->ictx, &viommu->obj);
+	return rc;
+}
+
+int iommufd_viommu_unset_vdev_id(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_viommu_unset_vdev_id *cmd = ucmd->cmd;
+	struct iommufd_viommu *viommu;
+	struct iommufd_vdev_id *old;
+	struct iommufd_device *idev;
+	int rc = 0;
+
+	if (cmd->vdev_id > ULONG_MAX)
+		return -EINVAL;
+
+	viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
+	if (IS_ERR(viommu))
+		return PTR_ERR(viommu);
+
+	idev = iommufd_get_device(ucmd, cmd->dev_id);
+	if (IS_ERR(idev)) {
+		rc = PTR_ERR(idev);
+		goto out_put_viommu;
+	}
+
+	down_write(&viommu->vdev_ids_rwsem);
+	mutex_lock(&idev->igroup->lock);
+	if (!idev->vdev_id) {
+		rc = -ENOENT;
+		goto out_unlock_igroup;
+	}
+	if (idev->vdev_id->id != cmd->vdev_id) {
+		rc = -EINVAL;
+		goto out_unlock_igroup;
+	}
+
+	old = xa_cmpxchg(&viommu->vdev_ids, idev->vdev_id->id,
+			 idev->vdev_id, NULL, GFP_KERNEL);
+	if (xa_is_err(old)) {
+		rc = xa_err(old);
+		goto out_unlock_igroup;
+	}
+	kfree(old);
+	idev->vdev_id = NULL;
+
+out_unlock_igroup:
+	mutex_unlock(&idev->igroup->lock);
+	up_write(&viommu->vdev_ids_rwsem);
+	iommufd_put_object(ucmd->ictx, &idev->obj);
+out_put_viommu:
+	iommufd_put_object(ucmd->ictx, &viommu->obj);
+	return rc;
+}
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 51ce6a019c34..1816e89c922d 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -52,6 +52,8 @@ enum {
  	IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
  	IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
  	IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f,
+	IOMMUFD_CMD_VIOMMU_SET_VDEV_ID = 0x90,
+	IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID = 0x91,
  };
/**
@@ -882,4 +884,42 @@ struct iommu_viommu_alloc {
  	__u32 out_viommu_id;
  };
  #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
+
+/**
+ * struct iommu_viommu_set_vdev_id - ioctl(IOMMU_VIOMMU_SET_VDEV_ID)
+ * @size: sizeof(struct iommu_viommu_set_vdev_id)
+ * @viommu_id: viommu ID to associate with the device to store its virtual ID
+ * @dev_id: device ID to set its virtual ID
+ * @__reserved: Must be 0
+ * @vdev_id: Virtual device ID
+ *
+ * Set a viommu-specific virtual ID of a device
+ */
+struct iommu_viommu_set_vdev_id {
+	__u32 size;
+	__u32 viommu_id;
+	__u32 dev_id;

Is this ID from vfio_device_bind_iommufd.out_devid?

+	__u32 __reserved;
+	__aligned_u64 vdev_id;

What is the nature of this id? It is not the guest's BDFn, is it? The code suggests it is ARM's "SID" == "stream ID" and "a device might be able to generate multiple StreamIDs" (how, why?) 🤯 And these streams seem to have nothing to do with PCIe IDE streams, right?

For my SEV-TIO exercise ("trusted IO"), I am looking for a kernel interface to pass the guest's BDFs for a specific host device (which is passed through) and nothing in the kernel has any knowledge of it atm, is this the right place, or another ioctl() is needed here?

Sorry, I am too ignorant about ARM :)


+};
+#define IOMMU_VIOMMU_SET_VDEV_ID _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_SET_VDEV_ID)
+
+/**
+ * struct iommu_viommu_unset_vdev_id - ioctl(IOMMU_VIOMMU_UNSET_VDEV_ID)
+ * @size: sizeof(struct iommu_viommu_unset_vdev_id)
+ * @viommu_id: viommu ID associated with the device to delete its virtual ID
+ * @dev_id: device ID to unset its virtual ID
+ * @__reserved: Must be 0
+ * @vdev_id: Virtual device ID (for verification)
+ *
+ * Unset a viommu-specific virtual ID of a device
+ */
+struct iommu_viommu_unset_vdev_id {
+	__u32 size;
+	__u32 viommu_id;
+	__u32 dev_id;
+	__u32 __reserved;
+	__aligned_u64 vdev_id;
+};
+#define IOMMU_VIOMMU_UNSET_VDEV_ID _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_UNSET_VDEV_ID)
  #endif

Nit: "git format-patch -O orderfile" makes patches nicer by putting the documentation first (.h before .c, in this case) with the "ordefile" looking like this:

===
*.txt
configure
*Makefile*
*.json
*.h
*.c
===

Thanks,

--
Alexey





[Index of Archives]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Share Photos]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Device Mapper]

  Powered by Linux