Re: [RFC PATCH] vfio/pci: add PCIe TPH to device feature ioctl

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 2025-02-21 at 22:46 +0000, Wathsala Vithanage wrote:
> Linux v6.13 introduced the PCIe TLP Processing Hints (TPH) feature
> for
> direct cache injection. As described in the relevant patch set [1],
> direct cache injection in supported hardware allows optimal platform
> resource utilization for specific requests on the PCIe bus. This
> feature
> is currently available only for kernel device drivers. However,
> user space applications, especially those whose performance is
> sensitive
> to the latency of inbound writes as seen by a CPU core, may benefit
> from
> using this information (E.g., DPDK cache stashing RFC [2] or an HPC
> application running in a VM).
> 
> This patch enables configuring of TPH from the user space via
> VFIO_DEVICE_FEATURE IOCLT. It provides an interface to user space
> drivers and VMMs to enable/disable the TPH feature on PCIe devices
> and
> set steering tags in MSI-X or steering-tag table entries using
> VFIO_DEVICE_FEATURE_SET flag or read steering tags from the kernel
> using
> VFIO_DEVICE_FEATURE_GET to operate in device-specific mode.
> 
> [1] 
> lore.kernel.org/linux-pci/20241002165954.128085-1-wei.huang2@xxxxxxx
> [2] 
> inbox.dpdk.org/dev/20241021015246.304431-2-wathsala.vithanage@xxxxxxx
> 
> Signed-off-by: Wathsala Vithanage <wathsala.vithanage@xxxxxxx>
> ---
>  drivers/vfio/pci/vfio_pci_core.c | 163
> +++++++++++++++++++++++++++++++
>  include/uapi/linux/vfio.h        |  68 +++++++++++++
>  2 files changed, 231 insertions(+)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_core.c
> b/drivers/vfio/pci/vfio_pci_core.c
> index 586e49efb81b..d6dd0495b08b 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -29,6 +29,7 @@
>  #include <linux/nospec.h>
>  #include <linux/sched/mm.h>
>  #include <linux/iommufd.h>
> +#include <linux/pci-tph.h>
>  #if IS_ENABLED(CONFIG_EEH)
>  #include <asm/eeh.h>
>  #endif
> @@ -1510,6 +1511,165 @@ static int vfio_pci_core_feature_token(struct
> vfio_device *device, u32 flags,
>  	return 0;
>  }
>  
> +static ssize_t vfio_pci_tph_uinfo_dup(struct vfio_pci_tph *tph,
> +				      void __user *arg, size_t
> argsz,
> +				      struct vfio_pci_tph_info
> **info)
> +{
> +	size_t minsz;
> +
> +	if (tph->count > VFIO_TPH_INFO_MAX)
> +		return -EINVAL;
> +	if (!tph->count)
> +		return 0;
> +
> +	minsz = tph->count * sizeof(struct vfio_pci_tph_info);
> +	if (minsz < argsz)
> +		return -EINVAL;
> +
> +	*info = memdup_user(arg, minsz);

You can use memdup_array_user() instead of the lines above. It does the
multiplication plus overflow check for you and will make your code more
compact.

> +	if (IS_ERR(info))
> +		return PTR_ERR(info);
> +
> +	return minsz;

see below…

> +}
> +
> +static int vfio_pci_feature_tph_st_op(struct vfio_pci_core_device
> *vdev,
> +				      struct vfio_pci_tph *tph,
> +				      void __user *arg, size_t
> argsz)
> +{
> +	int i, mtype, err = 0;
> +	u32 cpu_uid;
> +	struct vfio_pci_tph_info *info = NULL;
> +	ssize_t data_size = vfio_pci_tph_uinfo_dup(tph, arg, argsz,
> &info);
> +
> +	if (data_size <= 0)
> +		return data_size;

So it seems you return here in case of an error. However, that would
result in a length of 0 being an error?

I would try to avoid to return 0 for an error whenever possible. That
breaks convention.

How about you return the result value of memdup_array_user() in
…uinfo_dup()?

The only thing I can't tell is whether tph->count == 0 should be
treated as an error. Maybe map it to -EINVAL?


Regards,
P.

> +
> +	for (i = 0; i < tph->count; i++) {
> +		if (!(info[i].cpu_id < nr_cpu_ids &&
> cpu_present(info[i].cpu_id))) {
> +			info[i].err = -EINVAL;
> +			continue;
> +		}
> +		cpu_uid = topology_core_id(info[i].cpu_id);
> +		mtype = (info[i].flags & VFIO_TPH_MEM_TYPE_MASK) >>
> +			VFIO_TPH_MEM_TYPE_SHIFT;
> +
> +		/* processing hints are always ignored */
> +		info[i].ph_ignore = 1;
> +
> +		info[i].err = pcie_tph_get_cpu_st(vdev->pdev, mtype,
> cpu_uid,
> +						  &info[i].st);
> +		if (info[i].err)
> +			continue;
> +
> +		if (tph->flags & VFIO_DEVICE_FEATURE_TPH_SET_ST) {
> +			info[i].err = pcie_tph_set_st_entry(vdev-
> >pdev,
> +							   
> info[i].index,
> +							   
> info[i].st);
> +		}
> +	}
> +
> +	if (copy_to_user(arg, info, data_size))
> +		err = -EFAULT;
> +
> +	kfree(info);
> +	return err;
> +}
> +
> +
> +static int vfio_pci_feature_tph_enable(struct vfio_pci_core_device
> *vdev,
> +				       struct vfio_pci_tph *arg)
> +{
> +	int mode = arg->flags & VFIO_TPH_ST_MODE_MASK;
> +
> +	switch (mode) {
> +	case VFIO_TPH_ST_NS_MODE:
> +		return pcie_enable_tph(vdev->pdev,
> PCI_TPH_ST_NS_MODE);
> +
> +	case VFIO_TPH_ST_IV_MODE:
> +		return pcie_enable_tph(vdev->pdev,
> PCI_TPH_ST_IV_MODE);
> +
> +	case VFIO_TPH_ST_DS_MODE:
> +		return pcie_enable_tph(vdev->pdev,
> PCI_TPH_ST_DS_MODE);
> +
> +	default:
> +		return -EINVAL;
> +	}
> +
> +}
> +
> +static int vfio_pci_feature_tph_disable(struct vfio_pci_core_device
> *vdev)
> +{
> +	pcie_disable_tph(vdev->pdev);
> +	return 0;
> +}
> +
> +static int vfio_pci_feature_tph_prepare(struct vfio_pci_tph __user
> *arg,
> +					size_t argsz, u32 flags,
> +					struct vfio_pci_tph *tph)
> +{
> +	u32 op;
> +	int err = vfio_check_feature(flags, argsz,
> +				 VFIO_DEVICE_FEATURE_SET |
> +				 VFIO_DEVICE_FEATURE_GET,
> +				 sizeof(struct vfio_pci_tph));
> +	if (err != 1)
> +		return err;
> +
> +	if (copy_from_user(tph, arg, sizeof(struct vfio_pci_tph)))
> +		return -EFAULT;
> +
> +	op = tph->flags & VFIO_DEVICE_FEATURE_TPH_OP_MASK;
> +
> +	switch (op) {
> +	case VFIO_DEVICE_FEATURE_TPH_ENABLE:
> +	case VFIO_DEVICE_FEATURE_TPH_DISABLE:
> +	case VFIO_DEVICE_FEATURE_TPH_SET_ST:
> +		return (flags & VFIO_DEVICE_FEATURE_SET) ? 0 : -
> EINVAL;
> +
> +	case VFIO_DEVICE_FEATURE_TPH_GET_ST:
> +		return (flags & VFIO_DEVICE_FEATURE_GET) ? 0 : -
> EINVAL;
> +
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
> +static int vfio_pci_core_feature_tph(struct vfio_device *device, u32
> flags,
> +				     struct vfio_pci_tph __user
> *arg,
> +				     size_t argsz)
> +{
> +	u32 op;
> +	struct vfio_pci_tph tph;
> +	void __user *uinfo;
> +	size_t infosz;
> +	struct vfio_pci_core_device *vdev =
> +		container_of(device, struct vfio_pci_core_device,
> vdev);
> +	int err = vfio_pci_feature_tph_prepare(arg, argsz, flags,
> &tph);
> +
> +	if (err)
> +		return err;
> +
> +	op = tph.flags & VFIO_DEVICE_FEATURE_TPH_OP_MASK;
> +
> +	switch (op) {
> +	case VFIO_DEVICE_FEATURE_TPH_ENABLE:
> +		return vfio_pci_feature_tph_enable(vdev, &tph);
> +
> +	case VFIO_DEVICE_FEATURE_TPH_DISABLE:
> +		return vfio_pci_feature_tph_disable(vdev);
> +
> +	case VFIO_DEVICE_FEATURE_TPH_GET_ST:
> +	case VFIO_DEVICE_FEATURE_TPH_SET_ST:
> +		uinfo = (u8 *)(arg) + offsetof(struct vfio_pci_tph,
> info);
> +		infosz = argsz - sizeof(struct vfio_pci_tph);
> +		return vfio_pci_feature_tph_st_op(vdev, &tph, uinfo,
> infosz);
> +
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
>  int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32
> flags,
>  				void __user *arg, size_t argsz)
>  {
> @@ -1523,6 +1683,9 @@ int vfio_pci_core_ioctl_feature(struct
> vfio_device *device, u32 flags,
>  		return vfio_pci_core_pm_exit(device, flags, arg,
> argsz);
>  	case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
>  		return vfio_pci_core_feature_token(device, flags,
> arg, argsz);
> +	case VFIO_DEVICE_FEATURE_PCI_TPH:
> +		return vfio_pci_core_feature_tph(device, flags,
> +						 arg, argsz);
>  	default:
>  		return -ENOTTY;
>  	}
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index c8dbf8219c4f..608d57dfe279 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -1458,6 +1458,74 @@ struct vfio_device_feature_bus_master {
>  };
>  #define VFIO_DEVICE_FEATURE_BUS_MASTER 10
>  
> +/*
> + * Upon VFIO_DEVICE_FEATURE_SET, enable or disable PCIe TPH or set
> steering tags
> + * on the device. Data provided when setting this feature is a __u32
> with the
> + * following flags. VFIO_DEVICE_FEATURE_TPH_ENABLE enables PCIe TPH
> in
> + * no-steering-tag, interrupt-vector, or device-specific mode when
> feature flags
> + * VFIO_TPH_ST_NS_MODE, VFIO_TPH_ST_IV_MODE, and VFIO_TPH_ST_DS_MODE
> are set
> + * respectively.
> + * VFIO_DEVICE_FEATURE_TPH_DISABLE disables PCIe TPH on the device.
> + * VFIO_DEVICE_FEATURE_TPH_SET_ST set steering tags on a device at
> an index in
> + * MSI-X or ST-table depending on the VFIO_TPH_ST_x_MODE flag used
> and device
> + * capabilities. The caller can set multiple steering tags by
> passing an array
> + * of vfio_pci_tph_info objects containing cpu_id, cache_level, and
> + * MSI-X/ST-table index. The caller can also set the intended memory
> type and
> + * the processing hint by setting VFIO_TPH_MEM_TYPE_x and
> VFIO_TPH_HINT_x flags,
> + * respectively. The return value for each vfio_pci_tph_info object
> is stored in
> + * err, with the steering-tag set on the device and the ph_ignore
> status bit
> + * resulting from the steering-tag lookup operation. If err < 0, the
> values
> + * stored in the st and ph_ignore fields should be considered
> invalid.
> + *
> + * Upon VFIO_DEVICE_FEATURE_GET,  return steering tags to the
> caller.
> + * VFIO_DEVICE_FEATURE_TPH_GET_ST returns steering tags to the
> caller.
> + * The return values per vfio_pci_tph_info object are stored in the
> st,
> + * ph_ignore, and err fields.
> + */
> +struct vfio_pci_tph_info {
> +	/* in */
> +	__u32 cpu_id;
> +	__u32 cache_level;
> +	__u8  flags;
> +#define VFIO_TPH_MEM_TYPE_MASK		0x1
> +#define VFIO_TPH_MEM_TYPE_SHIFT		0
> +#define VFIO_TPH_MEM_TYPE_VMEM		0	/* Request volatile
> memory ST */
> +#define VFIO_TPH_MEM_TYPE_PMEM		1	/* Request
> persistent memory ST */
> +
> +#define VFIO_TPH_HINT_MASK		0x3
> +#define VFIO_TPH_HINT_SHIFT		1
> +#define VFIO_TPH_HINT_BIDIR		0
> +#define VFIO_TPH_HINT_REQSTR		(1 << VFIO_TPH_HINT_SHIFT)
> +#define VFIO_TPH_HINT_TARGET		(2 << VFIO_TPH_HINT_SHIFT)
> +#define VFIO_TPH_HINT_TARGET_PRIO	(3 << VFIO_TPH_HINT_SHIFT)
> +	__u16 index;			/* MSI-X/ST-table index to
> set ST */
> +	/* out */
> +	__u16 st;			/* Steering-Tag */
> +	__u8  ph_ignore;		/* Processing hint was
> ignored by */
> +	__s32 err;			/* Error on getting/setting
> Steering-Tag*/
> +};
> +
> +struct vfio_pci_tph {
> +	__u32 argsz;			/* Size of vfio_pci_tph and
> info[] */
> +	__u32 flags;
> +#define VFIO_DEVICE_FEATURE_TPH_OP_MASK		0x7
> +#define VFIO_DEVICE_FEATURE_TPH_OP_SHIFT	3
> +#define VFIO_DEVICE_FEATURE_TPH_ENABLE		0	/* Enable
> TPH on device */
> +#define VFIO_DEVICE_FEATURE_TPH_DISABLE	1	/* Disable
> TPH on device */
> +#define VFIO_DEVICE_FEATURE_TPH_GET_ST		2	/* Get
> steering-tags */
> +#define VFIO_DEVICE_FEATURE_TPH_SET_ST		4	/* Set
> steering-rags */
> +
> +#define	VFIO_TPH_ST_MODE_MASK	(0x3 <<
> VFIO_DEVICE_FEATURE_TPH_OP_SHIFT)
> +#define	VFIO_TPH_ST_NS_MODE	(0 <<
> VFIO_DEVICE_FEATURE_TPH_OP_SHIFT)
> +#define	VFIO_TPH_ST_IV_MODE	(1 <<
> VFIO_DEVICE_FEATURE_TPH_OP_SHIFT)
> +#define	VFIO_TPH_ST_DS_MODE	(2 <<
> VFIO_DEVICE_FEATURE_TPH_OP_SHIFT)
> +	__u32 count;				/* Number of entries
> in info[] */
> +	struct vfio_pci_tph_info info[];
> +#define VFIO_TPH_INFO_MAX	64		/* Max entries
> allowed in info[] */
> +};
> +
> +#define VFIO_DEVICE_FEATURE_PCI_TPH 11
> +
>  /* -------- API for Type1 VFIO IOMMU -------- */
>  
>  /**






[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux