This is V3 of the second part of the effort to provide support for per device MSI interrupt domains. Version 2 of this second part can be found here: https://lore.kernel.org/all/20221121083210.309161925@xxxxxxxxxxxxx The first part is available here: https://lore.kernel.org/all/20221111120501.026511281@xxxxxxxxxxxxx and has been merged into: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core This part on top of the tip irq/core branch is also available here: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git devmsi-v3-part2 This part is the main preparation step for per device MSI domains: 1) Introduce irqdomain pointer storage space in device::msi::data and related helpers. 2) Convert interfaces to handle multiple per device MSI domains based on a domain ID 3) Provide new interfaces for allocation/free which are domain ID based and provide range allocation/free which is a prerequisite for post MSI-X enable alloc/free. 4) Switch all existing call sites of msi allocation/free interfaces over to the new interfaces 5) Remove the old interfaces Changes vs. V2: - Rework the xarray storage into per domain storage (Marc) @Kevin: I did not pick up your Reviewed-by tag due to these changes. The delta patch vs. V2 is attached below. Thanks, tglx --- diff --git a/include/linux/msi.h b/include/linux/msi.h index af9437ee4d82..43b8866c8431 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -174,24 +174,31 @@ enum msi_desc_filter { MSI_DESC_ASSOCIATED, }; + +/** + * struct msi_dev_domain - The internals of MSI domain info per device + * @store: Xarray for storing MSI descriptor pointers + * @irqdomain: Pointer to a per device interrupt domain + */ +struct msi_dev_domain { + struct xarray store; + struct irq_domain *domain; +}; + /** * msi_device_data - MSI per device data * @properties: MSI properties which are interesting to drivers * @platform_data: Platform-MSI specific data * @mutex: Mutex protecting the MSI descriptor store - * @__store: Xarray for storing MSI descriptor pointers + * @__domains: Internal data for per device MSI domains * @__iter_idx: Index to search the next entry for iterators - * @__iter_max: Index to limit the search - * @__irqdomains: Per device interrupt domains */ struct msi_device_data { unsigned long properties; struct platform_msi_priv_data *platform_data; struct mutex mutex; - struct xarray __store; + struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS]; unsigned long __iter_idx; - unsigned long __iter_max; - struct irq_domain *__irqdomains[MSI_MAX_DEVICE_IRQDOMAINS]; }; int msi_setup_device_data(struct device *dev); @@ -219,7 +226,8 @@ static inline struct msi_desc *msi_first_desc(struct device *dev, return msi_domain_first_desc(dev, MSI_DEFAULT_DOMAIN, filter); } -struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); +struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter); /** * msi_domain_for_each_desc - Iterate the MSI descriptors in a specific domain @@ -236,7 +244,7 @@ struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); */ #define msi_domain_for_each_desc(desc, dev, domid, filter) \ for ((desc) = msi_domain_first_desc((dev), (domid), (filter)); (desc); \ - (desc) = msi_next_desc((dev), (filter))) + (desc) = msi_next_desc((dev), (domid), (filter))) /** * msi_for_each_desc - Iterate the MSI descriptors in the default irqdomain diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index c98c07ae46c9..695801b3cb68 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -34,37 +34,13 @@ struct msi_ctrl { unsigned int nirqs; }; -static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl); -static inline int msi_sysfs_create_group(struct device *dev); - -/* Invalid XA index which is outside of any searchable range */ +/* Invalid Xarray index which is outside of any searchable range */ #define MSI_XA_MAX_INDEX (ULONG_MAX - 1) +/* The maximum domain size */ #define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1) -static inline void msi_setup_default_irqdomain(struct device *dev, struct msi_device_data *md) -{ - if (!dev->msi.domain) - return; - /* - * If @dev::msi::domain is a global MSI domain, copy the pointer - * into the domain array to avoid conditionals all over the place. - */ - if (!irq_domain_is_msi_parent(dev->msi.domain)) - md->__irqdomains[MSI_DEFAULT_DOMAIN] = dev->msi.domain; -} - -static int msi_get_domain_base_index(struct device *dev, unsigned int domid) -{ - lockdep_assert_held(&dev->msi.data->mutex); - - if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS)) - return -ENODEV; - - if (WARN_ON_ONCE(!dev->msi.data->__irqdomains[domid])) - return -ENODEV; - - return domid * MSI_XA_DOMAIN_SIZE; -} +static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl); +static inline int msi_sysfs_create_group(struct device *dev); /** @@ -104,27 +80,16 @@ static void msi_free_desc(struct msi_desc *desc) kfree(desc); } -static int msi_insert_desc(struct device *dev, struct msi_desc *desc, +static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc, unsigned int domid, unsigned int index) { - struct msi_device_data *md = dev->msi.data; - int baseidx, ret; - - baseidx = msi_get_domain_base_index(dev, domid); - if (baseidx < 0) { - ret = baseidx; - goto fail; - } + struct xarray *xa = &md->__domains[domid].store; + int ret; desc->msi_index = index; - index += baseidx; - ret = xa_insert(&md->__store, index, desc, GFP_KERNEL); + ret = xa_insert(xa, index, desc, GFP_KERNEL); if (ret) - goto fail; - return 0; - -fail: - msi_free_desc(desc); + msi_free_desc(desc); return ret; } @@ -152,7 +117,7 @@ int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, /* Copy type specific data to the new descriptor. */ desc->pci = init_desc->pci; - return msi_insert_desc(dev, desc, domid, init_desc->msi_index); + return msi_insert_desc(dev->msi.data, desc, domid, init_desc->msi_index); } static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) @@ -169,9 +134,11 @@ static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) return false; } -static bool msi_ctrl_range_valid(struct device *dev, struct msi_ctrl *ctrl) +static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl) { - if (WARN_ON_ONCE(ctrl->first > ctrl->last || + if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS || + !dev->msi.data->__domains[ctrl->domid].domain || + ctrl->first > ctrl->last || ctrl->first > MSI_MAX_INDEX || ctrl->last > MSI_MAX_INDEX)) return false; @@ -180,21 +147,17 @@ static bool msi_ctrl_range_valid(struct device *dev, struct msi_ctrl *ctrl) static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl) { - struct xarray *xa = &dev->msi.data->__store; struct msi_desc *desc; + struct xarray *xa; unsigned long idx; - int base; lockdep_assert_held(&dev->msi.data->mutex); - if (!msi_ctrl_range_valid(dev, ctrl)) - return; - - base = msi_get_domain_base_index(dev, ctrl->domid); - if (base < 0) + if (!msi_ctrl_valid(dev, ctrl)) return; - xa_for_each_range(xa, idx, desc, ctrl->first + base, ctrl->last + base) { + xa = &dev->msi.data->__domains[ctrl->domid].store; + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { xa_erase(xa, idx); /* Leak the descriptor when it is still referenced */ @@ -234,22 +197,18 @@ static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl * { struct msi_desc *desc; unsigned int idx; - int ret, baseidx; + int ret; lockdep_assert_held(&dev->msi.data->mutex); - if (!msi_ctrl_range_valid(dev, ctrl)) + if (!msi_ctrl_valid(dev, ctrl)) return -EINVAL; - baseidx = msi_get_domain_base_index(dev, ctrl->domid); - if (baseidx < 0) - return baseidx; - for (idx = ctrl->first; idx <= ctrl->last; idx++) { desc = msi_alloc_desc(dev, 1, NULL); if (!desc) goto fail_mem; - ret = msi_insert_desc(dev, desc, ctrl->domid, idx); + ret = msi_insert_desc(dev->msi.data, desc, ctrl->domid, idx); if (ret) goto fail; } @@ -278,9 +237,12 @@ EXPORT_SYMBOL_GPL(get_cached_msi_msg); static void msi_device_data_release(struct device *dev, void *res) { struct msi_device_data *md = res; + int i; - WARN_ON_ONCE(!xa_empty(&md->__store)); - xa_destroy(&md->__store); + for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) { + WARN_ON_ONCE(!xa_empty(&md->__domains[i].store)); + xa_destroy(&md->__domains[i].store); + } dev->msi.data = NULL; } @@ -297,7 +259,7 @@ static void msi_device_data_release(struct device *dev, void *res) int msi_setup_device_data(struct device *dev) { struct msi_device_data *md; - int ret; + int ret, i; if (dev->msi.data) return 0; @@ -312,11 +274,19 @@ int msi_setup_device_data(struct device *dev) return ret; } - msi_setup_default_irqdomain(dev, md); + for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) + xa_init(&md->__domains[i].store); + + /* + * If @dev::msi::domain is set and is a global MSI domain, copy the + * pointer into the domain array so all code can operate on domain + * ids. The NULL pointer check is required to keep the legacy + * architecture specific PCI/MSI support working. + */ + if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain)) + md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain; - xa_init(&md->__store); mutex_init(&md->mutex); - md->__iter_idx = MSI_XA_MAX_INDEX; dev->msi.data = md; devres_add(dev, md); return 0; @@ -338,17 +308,19 @@ EXPORT_SYMBOL_GPL(msi_lock_descs); */ void msi_unlock_descs(struct device *dev) { - /* Invalidate the index wich was cached by the iterator */ + /* Invalidate the index which was cached by the iterator */ dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX; mutex_unlock(&dev->msi.data->mutex); } EXPORT_SYMBOL_GPL(msi_unlock_descs); -static struct msi_desc *msi_find_desc(struct msi_device_data *md, enum msi_desc_filter filter) +static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid, + enum msi_desc_filter filter) { + struct xarray *xa = &md->__domains[domid].store; struct msi_desc *desc; - xa_for_each_range(&md->__store, md->__iter_idx, desc, md->__iter_idx, md->__iter_max) { + xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) { if (msi_desc_match(desc, filter)) return desc; } @@ -372,26 +344,21 @@ struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, enum msi_desc_filter filter) { struct msi_device_data *md = dev->msi.data; - int baseidx; - if (WARN_ON_ONCE(!md)) + if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) return NULL; lockdep_assert_held(&md->mutex); - baseidx = msi_get_domain_base_index(dev, domid); - if (baseidx < 0) - return NULL; - - md->__iter_idx = baseidx; - md->__iter_max = baseidx + MSI_MAX_INDEX; - return msi_find_desc(md, filter); + md->__iter_idx = 0; + return msi_find_desc(md, domid, filter); } EXPORT_SYMBOL_GPL(msi_domain_first_desc); /** * msi_next_desc - Get the next MSI descriptor of a device * @dev: Device to operate on + * @domid: The id of the interrupt domain which should be walked. * @filter: Descriptor state filter * * The first invocation of msi_next_desc() has to be preceeded by a @@ -402,20 +369,21 @@ EXPORT_SYMBOL_GPL(msi_domain_first_desc); * Return: Pointer to the next MSI descriptor matching the search * criteria, NULL if none found. */ -struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter) +struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter) { struct msi_device_data *md = dev->msi.data; - if (WARN_ON_ONCE(!md)) + if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) return NULL; lockdep_assert_held(&md->mutex); - if (md->__iter_idx >= md->__iter_max) + if (md->__iter_idx >= (unsigned long)MSI_MAX_INDEX) return NULL; md->__iter_idx++; - return msi_find_desc(md, filter); + return msi_find_desc(md, domid, filter); } EXPORT_SYMBOL_GPL(msi_next_desc); @@ -432,12 +400,12 @@ unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne struct msi_desc *desc; unsigned int ret = 0; bool pcimsi = false; - int base; + struct xarray *xa; if (!dev->msi.data) return 0; - if (WARN_ON_ONCE(index > MSI_MAX_INDEX)) + if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) return 0; /* This check is only valid for the PCI default MSI domain */ @@ -445,13 +413,8 @@ unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne pcimsi = to_pci_dev(dev)->msi_enabled; msi_lock_descs(dev); - - base = msi_get_domain_base_index(dev, domid); - if (base < 0) - goto unlock; - - base += pcimsi ? 0 : index; - desc = xa_load(&dev->msi.data->__store, base); + xa = &dev->msi.data->__domains[domid].store; + desc = xa_load(xa, pcimsi ? 0 : index); if (desc && desc->irq) { /* * PCI-MSI has only one descriptor for multiple interrupts. @@ -466,7 +429,6 @@ unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigne } } -unlock: msi_unlock_descs(dev); return ret; } @@ -595,7 +557,7 @@ static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS)) return NULL; - domain = dev->msi.data->__irqdomains[domid]; + domain = dev->msi.data->__domains[domid].domain; if (!domain) return NULL; @@ -846,9 +808,10 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, .last = virq_base + nvec - 1, }; struct msi_desc *desc; + struct xarray *xa; int ret, virq; - if (!msi_ctrl_range_valid(dev, &ctrl)) + if (!msi_ctrl_valid(dev, &ctrl)) return -EINVAL; msi_lock_descs(dev); @@ -856,8 +819,10 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, if (ret) goto unlock; + xa = &dev->msi.data->__domains[ctrl.domid].store; + for (virq = virq_base; virq < virq_base + nvec; virq++) { - desc = xa_load(&dev->msi.data->__store, virq); + desc = xa_load(xa, virq); desc->irq = virq; ops->set_desc(arg, desc); @@ -987,8 +952,8 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain, struct msi_ctrl *ctrl) { + struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store; struct msi_domain_info *info = domain->host_data; - struct xarray *xa = &dev->msi.data->__store; struct msi_domain_ops *ops = info->ops; unsigned int vflags = 0, allocated = 0; msi_alloc_info_t arg = { }; @@ -1074,7 +1039,7 @@ static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl) struct irq_domain *domain; int ret; - if (!msi_ctrl_range_valid(dev, ctrl)) + if (!msi_ctrl_valid(dev, ctrl)) return -EINVAL; domain = msi_get_device_domain(dev, ctrl->domid); @@ -1180,16 +1145,14 @@ int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain, struct msi_ctrl *ctrl) { + struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store; struct msi_domain_info *info = domain->host_data; - struct xarray *xa = &dev->msi.data->__store; struct irq_data *irqd; struct msi_desc *desc; unsigned long idx; - int i, base; - - base = ctrl->domid * MSI_XA_DOMAIN_SIZE; + int i; - xa_for_each_range(xa, idx, desc, ctrl->first + base, ctrl->last + base) { + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { /* Only handle MSI entries which have an interrupt associated */ if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED)) continue; @@ -1214,7 +1177,7 @@ static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl) struct msi_domain_ops *ops; struct irq_domain *domain; - if (!msi_ctrl_range_valid(dev, ctrl)) + if (!msi_ctrl_valid(dev, ctrl)) return; domain = msi_get_device_domain(dev, ctrl->domid);