Re: [PATCH v2 6/7] vfio/platform: Create persistent IRQ handlers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Alex,

On 3/9/24 00:05, Alex Williamson wrote:
> The vfio-platform SET_IRQS ioctl currently allows loopback triggering of
> an interrupt before a signaling eventfd has been configured by the user,
> which thereby allows a NULL pointer dereference.
>
> Rather than register the IRQ relative to a valid trigger, register all
> IRQs in a disabled state in the device open path.  This allows mask
> operations on the IRQ to nest within the overall enable state governed
> by a valid eventfd signal.  This decouples @masked, protected by the
> @locked spinlock from @trigger, protected via the @igate mutex.
>
> In doing so, it's guaranteed that changes to @trigger cannot race the
> IRQ handlers because the IRQ handler is synchronously disabled before
> modifying the trigger, and loopback triggering of the IRQ via ioctl is
> safe due to serialization with trigger changes via igate.
>
> For compatibility, request_irq() failures are maintained to be local to
> the SET_IRQS ioctl rather than a fatal error in the open device path.
> This allows, for example, a userspace driver with polling mode support
> to continue to work regardless of moving the request_irq() call site.
> This necessarily blocks all SET_IRQS access to the failed index.
>
> Cc: Eric Auger <eric.auger@xxxxxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx
> Fixes: 57f972e2b341 ("vfio/platform: trigger an interrupt via eventfd")
> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
Tested-by: Eric Auger <eric.auger@xxxxxxxxxx>

Thanks

Eric

> ---
>  drivers/vfio/platform/vfio_platform_irq.c | 100 +++++++++++++++-------
>  1 file changed, 68 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c
> index e5dcada9e86c..ef41ecef83af 100644
> --- a/drivers/vfio/platform/vfio_platform_irq.c
> +++ b/drivers/vfio/platform/vfio_platform_irq.c
> @@ -136,6 +136,16 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev,
>  	return 0;
>  }
>  
> +/*
> + * The trigger eventfd is guaranteed valid in the interrupt path
> + * and protected by the igate mutex when triggered via ioctl.
> + */
> +static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx)
> +{
> +	if (likely(irq_ctx->trigger))
> +		eventfd_signal(irq_ctx->trigger);
> +}
> +
>  static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id)
>  {
>  	struct vfio_platform_irq *irq_ctx = dev_id;
> @@ -155,7 +165,7 @@ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id)
>  	spin_unlock_irqrestore(&irq_ctx->lock, flags);
>  
>  	if (ret == IRQ_HANDLED)
> -		eventfd_signal(irq_ctx->trigger);
> +		vfio_send_eventfd(irq_ctx);
>  
>  	return ret;
>  }
> @@ -164,52 +174,40 @@ static irqreturn_t vfio_irq_handler(int irq, void *dev_id)
>  {
>  	struct vfio_platform_irq *irq_ctx = dev_id;
>  
> -	eventfd_signal(irq_ctx->trigger);
> +	vfio_send_eventfd(irq_ctx);
>  
>  	return IRQ_HANDLED;
>  }
>  
>  static int vfio_set_trigger(struct vfio_platform_device *vdev, int index,
> -			    int fd, irq_handler_t handler)
> +			    int fd)
>  {
>  	struct vfio_platform_irq *irq = &vdev->irqs[index];
>  	struct eventfd_ctx *trigger;
> -	int ret;
>  
>  	if (irq->trigger) {
> -		irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN);
> -		free_irq(irq->hwirq, irq);
> -		kfree(irq->name);
> +		disable_irq(irq->hwirq);
>  		eventfd_ctx_put(irq->trigger);
>  		irq->trigger = NULL;
>  	}
>  
>  	if (fd < 0) /* Disable only */
>  		return 0;
> -	irq->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-irq[%d](%s)",
> -			      irq->hwirq, vdev->name);
> -	if (!irq->name)
> -		return -ENOMEM;
>  
>  	trigger = eventfd_ctx_fdget(fd);
> -	if (IS_ERR(trigger)) {
> -		kfree(irq->name);
> +	if (IS_ERR(trigger))
>  		return PTR_ERR(trigger);
> -	}
>  
>  	irq->trigger = trigger;
>  
> -	irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN);
> -	ret = request_irq(irq->hwirq, handler, 0, irq->name, irq);
> -	if (ret) {
> -		kfree(irq->name);
> -		eventfd_ctx_put(trigger);
> -		irq->trigger = NULL;
> -		return ret;
> -	}
> -
> -	if (!irq->masked)
> -		enable_irq(irq->hwirq);
> +	/*
> +	 * irq->masked effectively provides nested disables within the overall
> +	 * enable relative to trigger.  Specifically request_irq() is called
> +	 * with NO_AUTOEN, therefore the IRQ is initially disabled.  The user
> +	 * may only further disable the IRQ with a MASK operations because
> +	 * irq->masked is initially false.
> +	 */
> +	enable_irq(irq->hwirq);
>  
>  	return 0;
>  }
> @@ -228,7 +226,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev,
>  		handler = vfio_irq_handler;
>  
>  	if (!count && (flags & VFIO_IRQ_SET_DATA_NONE))
> -		return vfio_set_trigger(vdev, index, -1, handler);
> +		return vfio_set_trigger(vdev, index, -1);
>  
>  	if (start != 0 || count != 1)
>  		return -EINVAL;
> @@ -236,7 +234,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev,
>  	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
>  		int32_t fd = *(int32_t *)data;
>  
> -		return vfio_set_trigger(vdev, index, fd, handler);
> +		return vfio_set_trigger(vdev, index, fd);
>  	}
>  
>  	if (flags & VFIO_IRQ_SET_DATA_NONE) {
> @@ -260,6 +258,14 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev,
>  		    unsigned start, unsigned count, uint32_t flags,
>  		    void *data) = NULL;
>  
> +	/*
> +	 * For compatibility, errors from request_irq() are local to the
> +	 * SET_IRQS path and reflected in the name pointer.  This allows,
> +	 * for example, polling mode fallback for an exclusive IRQ failure.
> +	 */
> +	if (IS_ERR(vdev->irqs[index].name))
> +		return PTR_ERR(vdev->irqs[index].name);
> +
>  	switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
>  	case VFIO_IRQ_SET_ACTION_MASK:
>  		func = vfio_platform_set_irq_mask;
> @@ -280,7 +286,7 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev,
>  
>  int vfio_platform_irq_init(struct vfio_platform_device *vdev)
>  {
> -	int cnt = 0, i;
> +	int cnt = 0, i, ret = 0;
>  
>  	while (vdev->get_irq(vdev, cnt) >= 0)
>  		cnt++;
> @@ -292,29 +298,54 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev)
>  
>  	for (i = 0; i < cnt; i++) {
>  		int hwirq = vdev->get_irq(vdev, i);
> +		irq_handler_t handler = vfio_irq_handler;
>  
> -		if (hwirq < 0)
> +		if (hwirq < 0) {
> +			ret = -EINVAL;
>  			goto err;
> +		}
>  
>  		spin_lock_init(&vdev->irqs[i].lock);
>  
>  		vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD;
>  
> -		if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK)
> +		if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) {
>  			vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE
>  						| VFIO_IRQ_INFO_AUTOMASKED;
> +			handler = vfio_automasked_irq_handler;
> +		}
>  
>  		vdev->irqs[i].count = 1;
>  		vdev->irqs[i].hwirq = hwirq;
>  		vdev->irqs[i].masked = false;
> +		vdev->irqs[i].name = kasprintf(GFP_KERNEL_ACCOUNT,
> +					       "vfio-irq[%d](%s)", hwirq,
> +					       vdev->name);
> +		if (!vdev->irqs[i].name) {
> +			ret = -ENOMEM;
> +			goto err;
> +		}
> +
> +		ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN,
> +				  vdev->irqs[i].name, &vdev->irqs[i]);
> +		if (ret) {
> +			kfree(vdev->irqs[i].name);
> +			vdev->irqs[i].name = ERR_PTR(ret);
> +		}
>  	}
>  
>  	vdev->num_irqs = cnt;
>  
>  	return 0;
>  err:
> +	for (--i; i >= 0; i--) {
> +		if (!IS_ERR(vdev->irqs[i].name)) {
> +			free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]);
> +			kfree(vdev->irqs[i].name);
> +		}
> +	}
>  	kfree(vdev->irqs);
> -	return -EINVAL;
> +	return ret;
>  }
>  
>  void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev)
> @@ -324,7 +355,12 @@ void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev)
>  	for (i = 0; i < vdev->num_irqs; i++) {
>  		vfio_virqfd_disable(&vdev->irqs[i].mask);
>  		vfio_virqfd_disable(&vdev->irqs[i].unmask);
> -		vfio_set_trigger(vdev, i, -1, NULL);
> +		if (!IS_ERR(vdev->irqs[i].name)) {
> +			free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]);
> +			if (vdev->irqs[i].trigger)
> +				eventfd_ctx_put(vdev->irqs[i].trigger);
> +			kfree(vdev->irqs[i].name);
> +		}
>  	}
>  
>  	vdev->num_irqs = 0;





[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux