Hi Alex, On 3/30/2023 3:42 PM, Alex Williamson wrote: > On Thu, 30 Mar 2023 16:40:50 -0600 > Alex Williamson <alex.williamson@xxxxxxxxxx> wrote: > >> On Tue, 28 Mar 2023 14:53:34 -0700 >> Reinette Chatre <reinette.chatre@xxxxxxxxx> wrote: >> ... >>> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c >>> index b3a258e58625..755b752ca17e 100644 >>> --- a/drivers/vfio/pci/vfio_pci_intrs.c >>> +++ b/drivers/vfio/pci/vfio_pci_intrs.c >>> @@ -55,6 +55,13 @@ struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev, >>> return xa_load(&vdev->ctx, index); >>> } >>> >>> +static void vfio_irq_ctx_free(struct vfio_pci_core_device *vdev, >>> + struct vfio_pci_irq_ctx *ctx, unsigned long index) >>> +{ >>> + xa_erase(&vdev->ctx, index); >>> + kfree(ctx); >>> +} > > Also, the function below should use this rather than open coding the > same now. Thanks, It should, yes. Thank you. Will do. >>> static void vfio_irq_ctx_free_all(struct vfio_pci_core_device *vdev) >>> { >>> struct vfio_pci_irq_ctx *ctx; >>> @@ -409,33 +416,62 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, >>> { >>> struct pci_dev *pdev = vdev->pdev; >>> struct vfio_pci_irq_ctx *ctx; >>> + struct msi_map msix_map = {}; >>> + bool allow_dyn_alloc = false; >>> struct eventfd_ctx *trigger; >>> + bool new_ctx = false; >>> int irq, ret; >>> u16 cmd; >>> >>> + /* Only MSI-X allows dynamic allocation. */ >>> + if (msix && pci_msix_can_alloc_dyn(vdev->pdev)) >>> + allow_dyn_alloc = true; >> >> Should vfio-pci-core probe this and store it in a field on >> vfio_pci_core_device so that we can simply use something like >> vdev->has_dyn_msix throughout? It is not obvious to me if you mean this with vfio-pci-core probe, but it looks like a change to vfio_pci_core_enable() may be appropriate with a snippet like below: diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index a743b98ba29a..a474ce80a555 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -533,6 +533,8 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev) } else vdev->msix_bar = 0xFF; + vdev->has_dyn_msix = pci_msix_can_alloc_dyn(pdev); + if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) vdev->has_vga = true; Please do note that I placed it outside of the earlier "if (msix_pos)" since pci_msix_can_alloc_dyn() has its own "if (!dev->msix_cap)". If you prefer to keep all the vdev->*msix* together I can move it into the if statement. With vdev->has_dyn_msix available "allow_dyn_alloc" can be dropped as you stated. >> >>> + >>> ctx = vfio_irq_ctx_get(vdev, vector); >>> - if (!ctx) >>> + if (!ctx && !allow_dyn_alloc) >>> return -EINVAL; >>> + >>> irq = pci_irq_vector(pdev, vector); >>> + /* Context and interrupt are always allocated together. */ >>> + WARN_ON((ctx && irq == -EINVAL) || (!ctx && irq != -EINVAL)); >>> >>> - if (ctx->trigger) { >>> + if (ctx && ctx->trigger) { >>> irq_bypass_unregister_producer(&ctx->producer); >>> >>> cmd = vfio_pci_memory_lock_and_enable(vdev); >>> free_irq(irq, ctx->trigger); >>> + if (allow_dyn_alloc) { >> >> It almost seems easier to define msix_map in each scope that it's used: >> >> struct msi_map map = { .index = vector, >> .virq = irq }; >> Sure. Will do. >>> + msix_map.index = vector; >>> + msix_map.virq = irq; >>> + pci_msix_free_irq(pdev, msix_map); >>> + irq = -EINVAL; >>> + } >>> vfio_pci_memory_unlock_and_restore(vdev, cmd); >>> kfree(ctx->name); >>> eventfd_ctx_put(ctx->trigger); >>> ctx->trigger = NULL; >>> + if (allow_dyn_alloc) { >>> + vfio_irq_ctx_free(vdev, ctx, vector); >>> + ctx = NULL; >>> + } >>> } >>> >>> if (fd < 0) >>> return 0; >>> >>> + if (!ctx) { >>> + ctx = vfio_irq_ctx_alloc_single(vdev, vector); >>> + if (!ctx) >>> + return -ENOMEM; >>> + new_ctx = true; >>> + } >>> + >>> ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)", >>> msix ? "x" : "", vector, pci_name(pdev)); >>> - if (!ctx->name) >>> - return -ENOMEM; >>> + if (!ctx->name) { >>> + ret = -ENOMEM; >>> + goto out_free_ctx; >>> + } >>> >>> trigger = eventfd_ctx_fdget(fd); >>> if (IS_ERR(trigger)) { >>> @@ -443,25 +479,38 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, >>> goto out_free_name; >>> } >>> >>> - /* >>> - * The MSIx vector table resides in device memory which may be cleared >>> - * via backdoor resets. We don't allow direct access to the vector >>> - * table so even if a userspace driver attempts to save/restore around >>> - * such a reset it would be unsuccessful. To avoid this, restore the >>> - * cached value of the message prior to enabling. >>> - */ >>> cmd = vfio_pci_memory_lock_and_enable(vdev); >>> if (msix) { >>> - struct msi_msg msg; >>> - >>> - get_cached_msi_msg(irq, &msg); >>> - pci_write_msi_msg(irq, &msg); >>> + if (irq == -EINVAL) { >>> + msix_map = pci_msix_alloc_irq_at(pdev, vector, NULL); >> >> struct msi_map map = pci_msix_alloc_irq_at(pdev, >> vector, NULL); Will do. >>> + if (msix_map.index < 0) { >>> + vfio_pci_memory_unlock_and_restore(vdev, cmd); >>> + ret = msix_map.index; >>> + goto out_put_eventfd_ctx; >>> + } >>> + irq = msix_map.virq; >>> + } else { >>> + /* >>> + * The MSIx vector table resides in device memory which >>> + * may be cleared via backdoor resets. We don't allow >>> + * direct access to the vector table so even if a >>> + * userspace driver attempts to save/restore around >>> + * such a reset it would be unsuccessful. To avoid >>> + * this, restore the cached value of the message prior >>> + * to enabling. >>> + */ >> >> You've only just copied this comment down to here, but I think it's a >> bit stale. Maybe we should update it to something that helps explain >> this split better, maybe: >> >> /* >> * If the vector was previously allocated, refresh the >> * on-device message data before enabling in case it had >> * been cleared or corrupted since writing. >> */ >> >> IIRC, that was the purpose of writing it back to the device and the >> blocking of direct access is no longer accurate anyway. Thank you. Will do. To keep this patch focused I plan to separate this change into a new prep patch that will be placed earlier in this series. >> >>> + struct msi_msg msg; >>> + >>> + get_cached_msi_msg(irq, &msg); >>> + pci_write_msi_msg(irq, &msg); >>> + } >>> } >>> >>> ret = request_irq(irq, vfio_msihandler, 0, ctx->name, trigger); >>> - vfio_pci_memory_unlock_and_restore(vdev, cmd); >>> if (ret) >>> - goto out_put_eventfd_ctx; >>> + goto out_free_irq_locked; >>> + >>> + vfio_pci_memory_unlock_and_restore(vdev, cmd); >>> >>> ctx->producer.token = trigger; >>> ctx->producer.irq = irq; >>> @@ -477,11 +526,21 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev, >>> >>> return 0; >>> >>> +out_free_irq_locked: >>> + if (allow_dyn_alloc && new_ctx) { >> >> struct msi_map map = { .index = vector, >> .virq = irq }; >> Will do. >>> + msix_map.index = vector; >>> + msix_map.virq = irq; >>> + pci_msix_free_irq(pdev, msix_map); >>> + } >>> + vfio_pci_memory_unlock_and_restore(vdev, cmd); >>> out_put_eventfd_ctx: >>> eventfd_ctx_put(trigger); >>> out_free_name: >>> kfree(ctx->name); >>> ctx->name = NULL; >>> +out_free_ctx: >>> + if (allow_dyn_alloc && new_ctx) >>> + vfio_irq_ctx_free(vdev, ctx, vector); >>> return ret; >>> } >>> >> >> Do we really need the new_ctx test in the above cases? Thanks, new_ctx is not required for correctness but instead is used to keep the code symmetric. Specifically, if the user enables MSI-X without providing triggers and then later assign triggers then an error path without new_ctx would unwind more than done in this function, it would free the context that was allocated within vfio_msi_enable(). Reinette