Hi Nicolin, On 1/11/25 4:32 AM, Nicolin Chen wrote: > From: Jason Gunthorpe <jgg@xxxxxxxxxx> > > All the iommu cases simply want to override the MSI page's address with those which translate MSIs > the IOVA that was mapped through the iommu. This doesn't need a cookie > pointer, we just need to store the IOVA and its page size in the msi_desc. > > Instead provide msi_desc_set_iommu_msi_iova() which allows the IOMMU side > to specify the IOVA that the MSI page is placed during > iommu_dma_prepare(). This is stored in the msi_desc and then iommu_dma_prepare_msi() > iommu_dma_compose_msi_msg() is a simple inline that sets address_hi/lo. > > The next patch will correct the naming. > > This is done because we cannot correctly lock access to group->domain in > the atomic context that iommu_dma_compose_msi_msg() is called under. Today > the locking miss is tolerable because dma_iommu.c operates under an > assumption that the domain does not change while a driver is probed. > > However iommufd now permits the domain to change while the driver is > probed and VFIO userspace can create races with IRQ changes calling > iommu_dma_prepare/compose_msi_msg() and changing/freeing the iommu_domain. and is it safe in iommu_dma_prepare_msi()? > > Removing the pointer, and critically, the call to > iommu_get_domain_for_dev() during compose resolves this race. > > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx> > --- > include/linux/iommu.h | 6 ------ > include/linux/msi.h | 45 +++++++++++++++++++++++---------------- > drivers/iommu/dma-iommu.c | 30 +++++--------------------- > 3 files changed, 32 insertions(+), 49 deletions(-) > > diff --git a/include/linux/iommu.h b/include/linux/iommu.h > index 318d27841130..3a4215966c1b 100644 > --- a/include/linux/iommu.h > +++ b/include/linux/iommu.h > @@ -1513,7 +1513,6 @@ static inline void iommu_debugfs_setup(void) {} > int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); > > int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); > -void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); > > #else /* CONFIG_IOMMU_DMA */ > > @@ -1529,11 +1528,6 @@ static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_a > { > return 0; > } > - > -static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) > -{ > -} > - > #endif /* CONFIG_IOMMU_DMA */ > > /* > diff --git a/include/linux/msi.h b/include/linux/msi.h > index b10093c4d00e..d442b4a69d56 100644 > --- a/include/linux/msi.h > +++ b/include/linux/msi.h > @@ -184,7 +184,8 @@ struct msi_desc { > struct msi_msg msg; > struct irq_affinity_desc *affinity; > #ifdef CONFIG_IRQ_MSI_IOMMU > - const void *iommu_cookie; you may add kernel doc comments above > + u64 iommu_msi_iova : 58; > + u64 iommu_msi_page_shift : 6; > #endif > #ifdef CONFIG_SYSFS > struct device_attribute *sysfs_attrs; > @@ -285,28 +286,36 @@ struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, > > #define msi_desc_to_dev(desc) ((desc)->dev) > > -#ifdef CONFIG_IRQ_MSI_IOMMU > -static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) > -{ > - return desc->iommu_cookie; > -} > - > -static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, > - const void *iommu_cookie) > +static inline void msi_desc_set_iommu_msi_iova(struct msi_desc *desc, > + u64 msi_iova, > + unsigned int page_shift) > { > - desc->iommu_cookie = iommu_cookie; > -} > -#else > -static inline const void *msi_desc_get_iommu_cookie(struct msi_desc *desc) > -{ > - return NULL; > +#ifdef CONFIG_IRQ_MSI_IOMMU > + desc->iommu_msi_iova = msi_iova >> page_shift; > + desc->iommu_msi_page_shift = page_shift; > +#endif > } > > -static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, > - const void *iommu_cookie) > +/** > + * iommu_dma_compose_msi_msg() - Apply translation to an MSI message > + * @desc: MSI descriptor prepared by iommu_dma_prepare_msi() > + * @msg: MSI message containing target physical address > + */ > +static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, > + struct msi_msg *msg) > { > -} > +#ifdef CONFIG_IRQ_MSI_IOMMU > + if (desc->iommu_msi_page_shift) { > + u64 msi_iova = desc->iommu_msi_iova > + << desc->iommu_msi_page_shift; > + > + msg->address_hi = upper_32_bits(msi_iova); > + msg->address_lo = lower_32_bits(msi_iova) | > + (msg->address_lo & > + ((1 << desc->iommu_msi_page_shift) - 1)); > + } > #endif > +} > > int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, > struct msi_desc *init_desc); > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c > index 2a9fa0c8cc00..bf91e014d179 100644 > --- a/drivers/iommu/dma-iommu.c > +++ b/drivers/iommu/dma-iommu.c > @@ -1815,7 +1815,7 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) > static DEFINE_MUTEX(msi_prepare_lock); /* see below */ > > if (!domain || !domain->iova_cookie) { > - desc->iommu_cookie = NULL; > + msi_desc_set_iommu_msi_iova(desc, 0, 0); > return 0; > } > > @@ -1827,33 +1827,13 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) > mutex_lock(&msi_prepare_lock); > msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain); > mutex_unlock(&msi_prepare_lock); > - > - msi_desc_set_iommu_cookie(desc, msi_page); > - > if (!msi_page) > return -ENOMEM; > - return 0; > -} > > -/** > - * iommu_dma_compose_msi_msg() - Apply translation to an MSI message > - * @desc: MSI descriptor prepared by iommu_dma_prepare_msi() > - * @msg: MSI message containing target physical address > - */ > -void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) > -{ > - struct device *dev = msi_desc_to_dev(desc); > - const struct iommu_domain *domain = iommu_get_domain_for_dev(dev); > - const struct iommu_dma_msi_page *msi_page; > - > - msi_page = msi_desc_get_iommu_cookie(desc); > - > - if (!domain || !domain->iova_cookie || WARN_ON(!msi_page)) > - return; > - > - msg->address_hi = upper_32_bits(msi_page->iova); > - msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1; > - msg->address_lo += lower_32_bits(msi_page->iova); > + msi_desc_set_iommu_msi_iova( > + desc, msi_page->iova, > + ilog2(cookie_msi_granule(domain->iova_cookie))); > + return 0; > } > > static int iommu_dma_init(void)