Hi Thomas, I am trying all three parts of this work out with some experimental code within the IDXD driver that attempts to use IMS on the host. In the test, pci_ims_alloc_irq() always encounters -EBUSY and it seems that there is an attempt to insert the struct msi_desc into the xarray twice, the second attempt encountering the -EBUSY. While trying to understand what is going on I found myself looking at this code area and I'll annotate this patch with what I learned. On 11/11/2022 5:58 AM, Thomas Gleixner wrote: ... > --- a/kernel/irq/msi.c > +++ b/kernel/irq/msi.c > @@ -39,6 +39,7 @@ static inline int msi_sysfs_create_group > /* Invalid XA index which is outside of any searchable range */ > #define MSI_XA_MAX_INDEX (ULONG_MAX - 1) > #define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1) > +#define MSI_ANY_INDEX UINT_MAX > > static inline void msi_setup_default_irqdomain(struct device *dev, struct msi_device_data *md) > { > @@ -126,18 +127,34 @@ static int msi_insert_desc(struct device When calling pci_ims_alloc_irq(), msi_insert_desc() ends up being called twice, first with index = MSI_ANY_INDEX, second with index = 0. (domid = 1 both times) > } > > hwsize = msi_domain_get_hwsize(dev, domid); > - if (index >= hwsize) { > - ret = -ERANGE; > - goto fail; > - } > > - desc->msi_index = index; > - index += baseidx; > - ret = xa_insert(&md->__store, index, desc, GFP_KERNEL); > - if (ret) > - goto fail; > - return 0; > + if (index == MSI_ANY_INDEX) { > + struct xa_limit limit; > + unsigned int index; > + > + limit.min = baseidx; > + limit.max = baseidx + hwsize - 1; > > + /* Let the xarray allocate a free index within the limits */ > + ret = xa_alloc(&md->__store, &index, desc, limit, GFP_KERNEL); > + if (ret) > + goto fail; > + This path (index == MSI_ANY_INDEX) is followed when msi_insert_desc() is called the first time and the xa_alloc() succeeds at index 65536. > + desc->msi_index = index; This is problematic with desc->msi_index being a u16, assigning 65536 to it becomes 0. > + return 0; > + } else { > + if (index >= hwsize) { > + ret = -ERANGE; > + goto fail; > + } > + > + desc->msi_index = index; > + index += baseidx; > + ret = xa_insert(&md->__store, index, desc, GFP_KERNEL); > + if (ret) > + goto fail; This "else" path is followed when msi_insert_desc() is called the second time with "index = 0". The xa_insert() above fails at index 65536 (baseidx = 65536) with -EBUSY, trickling up as the return code to pci_ims_alloc_irq(). > + return 0; > + } > fail: > msi_free_desc(desc); > return ret; > @@ -335,7 +352,7 @@ int msi_setup_device_data(struct device > > msi_setup_default_irqdomain(dev, md); > > - xa_init(&md->__store); > + xa_init_flags(&md->__store, XA_FLAGS_ALLOC); > mutex_init(&md->mutex); > md->__iter_idx = MSI_XA_MAX_INDEX; > dev->msi.data = md; > @@ -1423,6 +1440,72 @@ int msi_domain_alloc_irqs_all_locked(str > return msi_domain_alloc_locked(dev, &ctrl); > } > > +/** > + * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at > + * a given index - or at the next free index > + * > + * @dev: Pointer to device struct of the device for which the interrupts > + * are allocated > + * @domid: Id of the interrupt domain to operate on > + * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation > + * uses the next free index. > + * @affdesc: Optional pointer to an interrupt affinity descriptor structure > + * @cookie: Optional pointer to a descriptor specific cookie to be stored > + * in msi_desc::data. Must be NULL for MSI-X allocations > + * > + * This requires a MSI interrupt domain which lets the core code manage the > + * MSI descriptors. > + * > + * Return: struct msi_map > + * > + * On success msi_map::index contains the allocated index number and > + * msi_map::virq the corresponding Linux interrupt number > + * > + * On failure msi_map::index contains the error code and msi_map::virq > + * is %0. > + */ > +struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index, > + const struct irq_affinity_desc *affdesc, > + union msi_dev_cookie *cookie) > +{ > + struct irq_domain *domain; > + struct msi_map map = { }; > + struct msi_desc *desc; > + int ret; > + > + msi_lock_descs(dev); > + domain = msi_get_device_domain(dev, domid); > + if (!domain) { > + map.index = -ENODEV; > + goto unlock; > + } > + > + desc = msi_alloc_desc(dev, 1, affdesc); > + if (!desc) { > + map.index = -ENOMEM; > + goto unlock; > + } > + > + if (cookie) > + desc->data.cookie = *cookie; > + > + ret = msi_insert_desc(dev, desc, domid, index); > + if (ret) { > + map.index = ret; > + goto unlock; > + } Above is the first call to msi_insert_desc(/* index = MSI_ANY_INDEX */) > + > + map.index = desc->msi_index; msi_insert_desc() did attempt to set desc->msi_index to 65536 but map.index ends up being 0. > + ret = msi_domain_alloc_irqs_range_locked(dev, domid, map.index, map.index); Here is where the second call to msi_insert_desc() originates: msi_domain_alloc_irqs_range_locked() -> msi_domain_alloc_locked() -> \ __msi_domain_alloc_locked() -> msi_domain_alloc_simple_msi_descs() -> \ msi_domain_add_simple_msi_descs() -> msi_insert_desc() > + if (ret) > + map.index = ret; > + else > + map.virq = desc->irq; > +unlock: > + msi_unlock_descs(dev); > + return map; > +} > + > static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain, > struct msi_ctrl *ctrl) > { > Reinette