Hi Alex, On 3/9/24 00:05, Alex Williamson wrote: > The vfio-platform SET_IRQS ioctl currently allows loopback triggering of > an interrupt before a signaling eventfd has been configured by the user, > which thereby allows a NULL pointer dereference. > > Rather than register the IRQ relative to a valid trigger, register all > IRQs in a disabled state in the device open path. This allows mask > operations on the IRQ to nest within the overall enable state governed > by a valid eventfd signal. This decouples @masked, protected by the > @locked spinlock from @trigger, protected via the @igate mutex. > > In doing so, it's guaranteed that changes to @trigger cannot race the > IRQ handlers because the IRQ handler is synchronously disabled before > modifying the trigger, and loopback triggering of the IRQ via ioctl is > safe due to serialization with trigger changes via igate. > > For compatibility, request_irq() failures are maintained to be local to > the SET_IRQS ioctl rather than a fatal error in the open device path. > This allows, for example, a userspace driver with polling mode support > to continue to work regardless of moving the request_irq() call site. > This necessarily blocks all SET_IRQS access to the failed index. > > Cc: Eric Auger <eric.auger@xxxxxxxxxx> > Cc: stable@xxxxxxxxxxxxxxx > Fixes: 57f972e2b341 ("vfio/platform: trigger an interrupt via eventfd") > Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> Tested-by: Eric Auger <eric.auger@xxxxxxxxxx> Thanks Eric > --- > drivers/vfio/platform/vfio_platform_irq.c | 100 +++++++++++++++------- > 1 file changed, 68 insertions(+), 32 deletions(-) > > diff --git a/drivers/vfio/platform/vfio_platform_irq.c b/drivers/vfio/platform/vfio_platform_irq.c > index e5dcada9e86c..ef41ecef83af 100644 > --- a/drivers/vfio/platform/vfio_platform_irq.c > +++ b/drivers/vfio/platform/vfio_platform_irq.c > @@ -136,6 +136,16 @@ static int vfio_platform_set_irq_unmask(struct vfio_platform_device *vdev, > return 0; > } > > +/* > + * The trigger eventfd is guaranteed valid in the interrupt path > + * and protected by the igate mutex when triggered via ioctl. > + */ > +static void vfio_send_eventfd(struct vfio_platform_irq *irq_ctx) > +{ > + if (likely(irq_ctx->trigger)) > + eventfd_signal(irq_ctx->trigger); > +} > + > static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) > { > struct vfio_platform_irq *irq_ctx = dev_id; > @@ -155,7 +165,7 @@ static irqreturn_t vfio_automasked_irq_handler(int irq, void *dev_id) > spin_unlock_irqrestore(&irq_ctx->lock, flags); > > if (ret == IRQ_HANDLED) > - eventfd_signal(irq_ctx->trigger); > + vfio_send_eventfd(irq_ctx); > > return ret; > } > @@ -164,52 +174,40 @@ static irqreturn_t vfio_irq_handler(int irq, void *dev_id) > { > struct vfio_platform_irq *irq_ctx = dev_id; > > - eventfd_signal(irq_ctx->trigger); > + vfio_send_eventfd(irq_ctx); > > return IRQ_HANDLED; > } > > static int vfio_set_trigger(struct vfio_platform_device *vdev, int index, > - int fd, irq_handler_t handler) > + int fd) > { > struct vfio_platform_irq *irq = &vdev->irqs[index]; > struct eventfd_ctx *trigger; > - int ret; > > if (irq->trigger) { > - irq_clear_status_flags(irq->hwirq, IRQ_NOAUTOEN); > - free_irq(irq->hwirq, irq); > - kfree(irq->name); > + disable_irq(irq->hwirq); > eventfd_ctx_put(irq->trigger); > irq->trigger = NULL; > } > > if (fd < 0) /* Disable only */ > return 0; > - irq->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-irq[%d](%s)", > - irq->hwirq, vdev->name); > - if (!irq->name) > - return -ENOMEM; > > trigger = eventfd_ctx_fdget(fd); > - if (IS_ERR(trigger)) { > - kfree(irq->name); > + if (IS_ERR(trigger)) > return PTR_ERR(trigger); > - } > > irq->trigger = trigger; > > - irq_set_status_flags(irq->hwirq, IRQ_NOAUTOEN); > - ret = request_irq(irq->hwirq, handler, 0, irq->name, irq); > - if (ret) { > - kfree(irq->name); > - eventfd_ctx_put(trigger); > - irq->trigger = NULL; > - return ret; > - } > - > - if (!irq->masked) > - enable_irq(irq->hwirq); > + /* > + * irq->masked effectively provides nested disables within the overall > + * enable relative to trigger. Specifically request_irq() is called > + * with NO_AUTOEN, therefore the IRQ is initially disabled. The user > + * may only further disable the IRQ with a MASK operations because > + * irq->masked is initially false. > + */ > + enable_irq(irq->hwirq); > > return 0; > } > @@ -228,7 +226,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, > handler = vfio_irq_handler; > > if (!count && (flags & VFIO_IRQ_SET_DATA_NONE)) > - return vfio_set_trigger(vdev, index, -1, handler); > + return vfio_set_trigger(vdev, index, -1); > > if (start != 0 || count != 1) > return -EINVAL; > @@ -236,7 +234,7 @@ static int vfio_platform_set_irq_trigger(struct vfio_platform_device *vdev, > if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { > int32_t fd = *(int32_t *)data; > > - return vfio_set_trigger(vdev, index, fd, handler); > + return vfio_set_trigger(vdev, index, fd); > } > > if (flags & VFIO_IRQ_SET_DATA_NONE) { > @@ -260,6 +258,14 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, > unsigned start, unsigned count, uint32_t flags, > void *data) = NULL; > > + /* > + * For compatibility, errors from request_irq() are local to the > + * SET_IRQS path and reflected in the name pointer. This allows, > + * for example, polling mode fallback for an exclusive IRQ failure. > + */ > + if (IS_ERR(vdev->irqs[index].name)) > + return PTR_ERR(vdev->irqs[index].name); > + > switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { > case VFIO_IRQ_SET_ACTION_MASK: > func = vfio_platform_set_irq_mask; > @@ -280,7 +286,7 @@ int vfio_platform_set_irqs_ioctl(struct vfio_platform_device *vdev, > > int vfio_platform_irq_init(struct vfio_platform_device *vdev) > { > - int cnt = 0, i; > + int cnt = 0, i, ret = 0; > > while (vdev->get_irq(vdev, cnt) >= 0) > cnt++; > @@ -292,29 +298,54 @@ int vfio_platform_irq_init(struct vfio_platform_device *vdev) > > for (i = 0; i < cnt; i++) { > int hwirq = vdev->get_irq(vdev, i); > + irq_handler_t handler = vfio_irq_handler; > > - if (hwirq < 0) > + if (hwirq < 0) { > + ret = -EINVAL; > goto err; > + } > > spin_lock_init(&vdev->irqs[i].lock); > > vdev->irqs[i].flags = VFIO_IRQ_INFO_EVENTFD; > > - if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) > + if (irq_get_trigger_type(hwirq) & IRQ_TYPE_LEVEL_MASK) { > vdev->irqs[i].flags |= VFIO_IRQ_INFO_MASKABLE > | VFIO_IRQ_INFO_AUTOMASKED; > + handler = vfio_automasked_irq_handler; > + } > > vdev->irqs[i].count = 1; > vdev->irqs[i].hwirq = hwirq; > vdev->irqs[i].masked = false; > + vdev->irqs[i].name = kasprintf(GFP_KERNEL_ACCOUNT, > + "vfio-irq[%d](%s)", hwirq, > + vdev->name); > + if (!vdev->irqs[i].name) { > + ret = -ENOMEM; > + goto err; > + } > + > + ret = request_irq(hwirq, handler, IRQF_NO_AUTOEN, > + vdev->irqs[i].name, &vdev->irqs[i]); > + if (ret) { > + kfree(vdev->irqs[i].name); > + vdev->irqs[i].name = ERR_PTR(ret); > + } > } > > vdev->num_irqs = cnt; > > return 0; > err: > + for (--i; i >= 0; i--) { > + if (!IS_ERR(vdev->irqs[i].name)) { > + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); > + kfree(vdev->irqs[i].name); > + } > + } > kfree(vdev->irqs); > - return -EINVAL; > + return ret; > } > > void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) > @@ -324,7 +355,12 @@ void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev) > for (i = 0; i < vdev->num_irqs; i++) { > vfio_virqfd_disable(&vdev->irqs[i].mask); > vfio_virqfd_disable(&vdev->irqs[i].unmask); > - vfio_set_trigger(vdev, i, -1, NULL); > + if (!IS_ERR(vdev->irqs[i].name)) { > + free_irq(vdev->irqs[i].hwirq, &vdev->irqs[i]); > + if (vdev->irqs[i].trigger) > + eventfd_ctx_put(vdev->irqs[i].trigger); > + kfree(vdev->irqs[i].name); > + } > } > > vdev->num_irqs = 0;