Re: [PATCH v5 2/2] vfio/cdx: add interrupt support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 26 Mar 2024 09:38:04 +0530
Nipun Gupta <nipun.gupta@xxxxxxx> wrote:

> Support the following ioctls for CDX devices:
> - VFIO_DEVICE_GET_IRQ_INFO
> - VFIO_DEVICE_SET_IRQS
> 
> This allows user to set an eventfd for cdx device interrupts and
> trigger this interrupt eventfd from userspace.
> All CDX device interrupts are MSIs. The MSIs are allocated from the
> CDX-MSI domain.
> 
> Signed-off-by: Nipun Gupta <nipun.gupta@xxxxxxx>
> Reviewed-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@xxxxxxx>
> ---
> 
> Changes v4->v5:
> - Rebased on 6.9-rc1
> 
> Changes v3->v4:
> - Added VFIO_IRQ_INFO_NORESIZE flag
> 
> Changes v2->v3:
> - Use generic MSI alloc/free APIs instead of CDX MSI APIs
> - Rebased on Linux 6.8-rc6
> 
> Changes v1->v2:
> - Rebased on Linux 6.7-rc1
> 
>  drivers/vfio/cdx/Makefile  |   2 +-
>  drivers/vfio/cdx/intr.c    | 211 +++++++++++++++++++++++++++++++++++++
>  drivers/vfio/cdx/main.c    |  60 ++++++++++-
>  drivers/vfio/cdx/private.h |  18 ++++
>  4 files changed, 289 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/vfio/cdx/intr.c
> 
> diff --git a/drivers/vfio/cdx/Makefile b/drivers/vfio/cdx/Makefile
> index cd4a2e6fe609..df92b320122a 100644
> --- a/drivers/vfio/cdx/Makefile
> +++ b/drivers/vfio/cdx/Makefile
> @@ -5,4 +5,4 @@
>  
>  obj-$(CONFIG_VFIO_CDX) += vfio-cdx.o
>  
> -vfio-cdx-objs := main.o
> +vfio-cdx-objs := main.o intr.o
> diff --git a/drivers/vfio/cdx/intr.c b/drivers/vfio/cdx/intr.c
> new file mode 100644
> index 000000000000..4637b57d0242
> --- /dev/null
> +++ b/drivers/vfio/cdx/intr.c
> @@ -0,0 +1,211 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
> + */
> +
> +#include <linux/vfio.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/eventfd.h>
> +#include <linux/msi.h>
> +#include <linux/interrupt.h>
> +
> +#include "linux/cdx/cdx_bus.h"
> +#include "private.h"
> +
> +static irqreturn_t vfio_cdx_msihandler(int irq_no, void *arg)
> +{
> +	struct eventfd_ctx *trigger = arg;
> +
> +	eventfd_signal(trigger);
> +	return IRQ_HANDLED;
> +}
> +
> +static int vfio_cdx_msi_enable(struct vfio_cdx_device *vdev, int nvec)
> +{
> +	struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> +	struct device *dev = vdev->vdev.dev;
> +	int msi_idx, ret;
> +
> +	vdev->cdx_irqs = kcalloc(nvec, sizeof(struct vfio_cdx_irq), GFP_KERNEL);
> +	if (!vdev->cdx_irqs)
> +		return -ENOMEM;
> +
> +	ret = cdx_enable_msi(cdx_dev);
> +	if (ret) {
> +		kfree(vdev->cdx_irqs);
> +		return ret;
> +	}
> +
> +	/* Allocate cdx MSIs */
> +	ret = msi_domain_alloc_irqs(dev, MSI_DEFAULT_DOMAIN, nvec);
> +	if (ret) {
> +		cdx_disable_msi(cdx_dev);
> +		kfree(vdev->cdx_irqs);
> +		return ret;
> +	}
> +
> +	for (msi_idx = 0; msi_idx < nvec; msi_idx++)
> +		vdev->cdx_irqs[msi_idx].irq_no = msi_get_virq(dev, msi_idx);
> +
> +	vdev->irq_count = nvec;
> +	vdev->config_msi = 1;
> +
> +	return 0;
> +}
> +
> +static int vfio_cdx_msi_set_vector_signal(struct vfio_cdx_device *vdev,
> +					  int vector, int fd)
> +{
> +	struct eventfd_ctx *trigger;
> +	int irq_no, ret;
> +
> +	if (vector < 0 || vector >= vdev->irq_count)
> +		return -EINVAL;
> +
> +	irq_no = vdev->cdx_irqs[vector].irq_no;
> +
> +	if (vdev->cdx_irqs[vector].trigger) {
> +		free_irq(irq_no, vdev->cdx_irqs[vector].trigger);
> +		kfree(vdev->cdx_irqs[vector].name);
> +		eventfd_ctx_put(vdev->cdx_irqs[vector].trigger);
> +		vdev->cdx_irqs[vector].trigger = NULL;
> +	}
> +
> +	if (fd < 0)
> +		return 0;
> +
> +	vdev->cdx_irqs[vector].name = kasprintf(GFP_KERNEL, "vfio-msi[%d](%s)",
> +						vector, dev_name(vdev->vdev.dev));
> +	if (!vdev->cdx_irqs[vector].name)
> +		return -ENOMEM;
> +
> +	trigger = eventfd_ctx_fdget(fd);
> +	if (IS_ERR(trigger)) {
> +		kfree(vdev->cdx_irqs[vector].name);
> +		return PTR_ERR(trigger);
> +	}
> +
> +	ret = request_irq(irq_no, vfio_cdx_msihandler, 0,
> +			  vdev->cdx_irqs[vector].name, trigger);
> +	if (ret) {
> +		kfree(vdev->cdx_irqs[vector].name);
> +		eventfd_ctx_put(trigger);
> +		return ret;
> +	}
> +
> +	vdev->cdx_irqs[vector].trigger = trigger;
> +
> +	return 0;
> +}
> +
> +static int vfio_cdx_msi_set_block(struct vfio_cdx_device *vdev,
> +				  unsigned int start, unsigned int count,
> +				  int32_t *fds)
> +{
> +	int i, j, ret = 0;
> +
> +	if (start >= vdev->irq_count || start + count > vdev->irq_count)
> +		return -EINVAL;
> +
> +	for (i = 0, j = start; i < count && !ret; i++, j++) {
> +		int fd = fds ? fds[i] : -1;
> +
> +		ret = vfio_cdx_msi_set_vector_signal(vdev, j, fd);
> +	}
> +
> +	if (ret) {
> +		for (--j; j >= (int)start; j--)
> +			vfio_cdx_msi_set_vector_signal(vdev, j, -1);
> +	}
> +
> +	return ret;
> +}
> +
> +static void vfio_cdx_msi_disable(struct vfio_cdx_device *vdev)
> +{
> +	struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> +	struct device *dev = vdev->vdev.dev;
> +
> +	vfio_cdx_msi_set_block(vdev, 0, vdev->irq_count, NULL);
> +
> +	if (!vdev->config_msi)
> +		return;
> +
> +	msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN);
> +	cdx_disable_msi(cdx_dev);
> +	kfree(vdev->cdx_irqs);
> +
> +	vdev->cdx_irqs = NULL;
> +	vdev->irq_count = 0;
> +	vdev->config_msi = 0;
> +}
> +
> +static int vfio_cdx_set_msi_trigger(struct vfio_cdx_device *vdev,
> +				    unsigned int index, unsigned int start,
> +				    unsigned int count, u32 flags,
> +				    void *data)
> +{
> +	struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> +	int i;
> +
> +	if (start + count > cdx_dev->num_msi)
> +		return -EINVAL;
> +
> +	if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
> +		vfio_cdx_msi_disable(vdev);
> +		return 0;
> +	}
> +
> +	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
> +		s32 *fds = data;
> +		int ret;
> +
> +		if (vdev->config_msi)
> +			return vfio_cdx_msi_set_block(vdev, start, count,
> +						  fds);
> +		ret = vfio_cdx_msi_enable(vdev, cdx_dev->num_msi);
> +		if (ret)
> +			return ret;
> +
> +		ret = vfio_cdx_msi_set_block(vdev, start, count, fds);
> +		if (ret)
> +			vfio_cdx_msi_disable(vdev);
> +
> +		return ret;
> +	}
> +
> +	for (i = start; i < start + count; i++) {
> +		if (!vdev->cdx_irqs[i].trigger)
> +			continue;
> +		if (flags & VFIO_IRQ_SET_DATA_NONE)
> +			eventfd_signal(vdev->cdx_irqs[i].trigger);

Typically DATA_BOOL support is also added here.

> +	}
> +
> +	return 0;
> +}
> +
> +int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev,
> +			    u32 flags, unsigned int index,
> +			    unsigned int start, unsigned int count,
> +			    void *data)
> +{
> +	if (flags & VFIO_IRQ_SET_ACTION_TRIGGER)
> +		return vfio_cdx_set_msi_trigger(vdev, index, start,
> +			  count, flags, data);
> +	else
> +		return -EINVAL;
> +}
> +
> +/* Free All IRQs for the given device */
> +void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev)
> +{
> +	/*
> +	 * Device does not support any interrupt or the interrupts
> +	 * were not configured
> +	 */
> +	if (!vdev->cdx_irqs)
> +		return;
> +
> +	vfio_cdx_set_msi_trigger(vdev, 1, 0, 0, VFIO_IRQ_SET_DATA_NONE, NULL);

@index is passed as 1 here.  AFAICT only index zero is supported.  The
SET_IRQS ioctl path catches this in
vfio_set_irqs_validate_and_prepare() but it might cause some strange
behavior here if another index were ever added.

> +}
> diff --git a/drivers/vfio/cdx/main.c b/drivers/vfio/cdx/main.c
> index 9cff8d75789e..f0861a38ae10 100644
> --- a/drivers/vfio/cdx/main.c
> +++ b/drivers/vfio/cdx/main.c
> @@ -61,6 +61,7 @@ static void vfio_cdx_close_device(struct vfio_device *core_vdev)
>  
>  	kfree(vdev->regions);
>  	cdx_dev_reset(core_vdev->dev);
> +	vfio_cdx_irqs_cleanup(vdev);
>  }
>  
>  static int vfio_cdx_bm_ctrl(struct vfio_device *core_vdev, u32 flags,
> @@ -123,7 +124,7 @@ static int vfio_cdx_ioctl_get_info(struct vfio_cdx_device *vdev,
>  	info.flags |= VFIO_DEVICE_FLAGS_RESET;
>  
>  	info.num_regions = cdx_dev->res_count;
> -	info.num_irqs = 0;
> +	info.num_irqs = cdx_dev->num_msi ? 1 : 0;
>  
>  	return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
>  }
> @@ -152,6 +153,59 @@ static int vfio_cdx_ioctl_get_region_info(struct vfio_cdx_device *vdev,
>  	return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
>  }
>  
> +static int vfio_cdx_ioctl_get_irq_info(struct vfio_cdx_device *vdev,
> +				       struct vfio_irq_info __user *arg)
> +{
> +	unsigned long minsz = offsetofend(struct vfio_irq_info, count);
> +	struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> +	struct vfio_irq_info info;
> +
> +	if (copy_from_user(&info, arg, minsz))
> +		return -EFAULT;
> +
> +	if (info.argsz < minsz)
> +		return -EINVAL;
> +
> +	if (info.index >= 1)
> +		return -EINVAL;
> +
> +	info.flags = VFIO_IRQ_INFO_EVENTFD | VFIO_IRQ_INFO_NORESIZE;
> +	info.count = cdx_dev->num_msi;

This should return -EINVAL if cdx_dev->num_msi is zero.

> +
> +	return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
> +}
> +
> +static int vfio_cdx_ioctl_set_irqs(struct vfio_cdx_device *vdev,
> +				   struct vfio_irq_set __user *arg)
> +{
> +	unsigned long minsz = offsetofend(struct vfio_irq_set, count);
> +	struct cdx_device *cdx_dev = to_cdx_device(vdev->vdev.dev);
> +	struct vfio_irq_set hdr;
> +	size_t data_size = 0;
> +	u8 *data = NULL;
> +	int ret = 0;
> +
> +	if (copy_from_user(&hdr, arg, minsz))
> +		return -EFAULT;
> +
> +	ret = vfio_set_irqs_validate_and_prepare(&hdr, cdx_dev->num_msi,
> +						 1, &data_size);
> +	if (ret)
> +		return ret;
> +
> +	if (data_size) {
> +		data = memdup_user(arg->data, data_size);
> +		if (IS_ERR(data))
> +			return PTR_ERR(data);
> +	}
> +
> +	ret = vfio_cdx_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
> +				      hdr.start, hdr.count, data);
> +	kfree(data);
> +
> +	return ret;
> +}
> +
>  static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
>  			   unsigned int cmd, unsigned long arg)
>  {
> @@ -164,6 +218,10 @@ static long vfio_cdx_ioctl(struct vfio_device *core_vdev,
>  		return vfio_cdx_ioctl_get_info(vdev, uarg);
>  	case VFIO_DEVICE_GET_REGION_INFO:
>  		return vfio_cdx_ioctl_get_region_info(vdev, uarg);
> +	case VFIO_DEVICE_GET_IRQ_INFO:
> +		return vfio_cdx_ioctl_get_irq_info(vdev, uarg);
> +	case VFIO_DEVICE_SET_IRQS:
> +		return vfio_cdx_ioctl_set_irqs(vdev, uarg);
>  	case VFIO_DEVICE_RESET:
>  		return cdx_dev_reset(core_vdev->dev);
>  	default:
> diff --git a/drivers/vfio/cdx/private.h b/drivers/vfio/cdx/private.h
> index 8e9d25913728..7a8477ae4652 100644
> --- a/drivers/vfio/cdx/private.h
> +++ b/drivers/vfio/cdx/private.h
> @@ -13,6 +13,14 @@ static inline u64 vfio_cdx_index_to_offset(u32 index)
>  	return ((u64)(index) << VFIO_CDX_OFFSET_SHIFT);
>  }
>  
> +struct vfio_cdx_irq {
> +	u32			flags;
> +	u32			count;
> +	int			irq_no;
> +	struct eventfd_ctx	*trigger;
> +	char			*name;
> +};
> +
>  struct vfio_cdx_region {
>  	u32			flags;
>  	u32			type;
> @@ -25,6 +33,16 @@ struct vfio_cdx_device {
>  	struct vfio_cdx_region	*regions;
>  	u32			flags;
>  #define BME_SUPPORT BIT(0)
> +	struct vfio_cdx_irq	*cdx_irqs;
> +	u32			irq_count;
> +	u32			config_msi;
>  };

You might want to reorder these to avoid holes in the data structures.
vfio_cdx_irq will have a 4byte hole in the middle, vfio_cdx_device will
have a 4byte hole after flags.  config_msi is used as a bool, I'm not
sure why it's defined as a u32.  irq_count also holds a value up to 1,
so a u32 might be oversized.

There's clearly latent support here for devices with multiple MSI
vectors, is this just copying vfio-pci-core or will CDX devices have
multiple MSIs?  Thanks,

Alex

>  
> +int vfio_cdx_set_irqs_ioctl(struct vfio_cdx_device *vdev,
> +			    u32 flags, unsigned int index,
> +			    unsigned int start, unsigned int count,
> +			    void *data);
> +
> +void vfio_cdx_irqs_cleanup(struct vfio_cdx_device *vdev);
> +
>  #endif /* VFIO_CDX_PRIVATE_H */





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux