> From: Nicolin Chen <nicolinc@xxxxxxxxxx> > Sent: Saturday, January 11, 2025 11:32 AM > > @@ -294,7 +294,9 @@ struct iommu_ioas_unmap { > > /** > * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and > - * ioctl(IOMMU_OPTION_HUGE_PAGES) > + * ioctl(IOMMU_OPTION_HUGE_PAGES) and > + * ioctl(IOMMU_OPTION_SW_MSI_START) and > + * ioctl(IOMMU_OPTION_SW_MSI_SIZE) > * @IOMMU_OPTION_RLIMIT_MODE: > * Change how RLIMIT_MEMLOCK accounting works. The caller must have > privilege > * to invoke this. Value 0 (default) is user based accounting, 1 uses process > @@ -304,10 +306,24 @@ struct iommu_ioas_unmap { > * iommu mappings. Value 0 disables combining, everything is mapped to > * PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS > * option, the object_id must be the IOAS ID. > + * @IOMMU_OPTION_SW_MSI_START: > + * Change the base address of the IOMMU mapping region for MSI > doorbell(s). > + * It must be set this before attaching a device to an IOAS/HWPT, remove 'this' > otherwise > + * this option will be not effective on that IOAS/HWPT. User can Do we want to explicitly check this instead of leaving it no effect silently? > choose to > + * let kernel pick a base address, by simply ignoring this option or setting > + * a value 0 to IOMMU_OPTION_SW_MSI_SIZE. Global option, object_id > must be 0 > + * @IOMMU_OPTION_SW_MSI_SIZE: > + * Change the size of the IOMMU mapping region for MSI doorbell(s). It > must > + * be set this before attaching a device to an IOAS/HWPT, otherwise it > won't > + * be effective on that IOAS/HWPT. The value is in MB, and the minimum > value > + * is 1 MB. A value 0 (default) will invalidate the MSI doorbell base address > + * value set to IOMMU_OPTION_SW_MSI_START. Global option, object_id > must be 0 hmm there is no check on the minimal value and enable the effect of value 0 in this patch. > iommufd_device_attach_reserved_iova(struct iommufd_device *idev, > struct iommufd_hwpt_paging > *hwpt_paging) > { > + struct iommufd_ctx *ictx = idev->ictx; > int rc; > > lockdep_assert_held(&idev->igroup->lock); > > + /* Override it with a user-programmed SW_MSI region */ > + if (ictx->sw_msi_size && ictx->sw_msi_start != PHYS_ADDR_MAX) > + idev->igroup->sw_msi_start = ictx->sw_msi_start; > rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, > idev->dev, > &idev->igroup- > >sw_msi_start); what about moving above additions into iopt_table_enforce_dev_resv_regions() which is all about finding a sw_msi address and can check the user setting internally? > diff --git a/drivers/iommu/iommufd/io_pagetable.c > b/drivers/iommu/iommufd/io_pagetable.c > index 8a790e597e12..5d7f5ca1eecf 100644 > --- a/drivers/iommu/iommufd/io_pagetable.c > +++ b/drivers/iommu/iommufd/io_pagetable.c > @@ -1446,7 +1446,9 @@ int iopt_table_enforce_dev_resv_regions(struct > io_pagetable *iopt, > if (sw_msi_start && resv->type == IOMMU_RESV_MSI) > num_hw_msi++; > if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) { > - *sw_msi_start = resv->start; > + /* Bypass the driver-defined SW_MSI region, if preset > */ > + if (*sw_msi_start == PHYS_ADDR_MAX) > + *sw_msi_start = resv->start; the code is not about bypass. Instead it's to use the driver-defined region if user doesn't set it.