> From: Jason Gunthorpe <jgg@xxxxxxxxxx> > Sent: Thursday, March 24, 2022 6:55 AM > > On Wed, Mar 23, 2022 at 05:34:18PM -0300, Jason Gunthorpe wrote: > > > Stated another way, any platform that wires dev_is_dma_coherent() to > > true, like all x86 does, must support IOMMU_CACHE and report > > IOMMU_CAP_CACHE_COHERENCY for every iommu_domain the platform > > supports. The platform obviously declares it support this in order to > > support the in-kernel DMA API. > > That gives me a nice simple idea: > > diff --git a/drivers/iommu/iommufd/device.c > b/drivers/iommu/iommufd/device.c > index 3c6b95ad026829..8366884df4a030 100644 > --- a/drivers/iommu/iommufd/device.c > +++ b/drivers/iommu/iommufd/device.c > @@ -8,6 +8,7 @@ > #include <linux/pci.h> > #include <linux/irqdomain.h> > #include <linux/dma-iommu.h> > +#include <linux/dma-map-ops.h> > > #include "iommufd_private.h" > > @@ -61,6 +62,10 @@ struct iommufd_device > *iommufd_bind_pci_device(int fd, struct pci_dev *pdev, > struct iommu_group *group; > int rc; > > + /* iommufd always uses IOMMU_CACHE */ > + if (!dev_is_dma_coherent(&pdev->dev)) > + return ERR_PTR(-EINVAL); > + > ictx = iommufd_fget(fd); > if (!ictx) > return ERR_PTR(-EINVAL); > diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c > index 48149988c84bbc..3d6df1ffbf93e6 100644 > --- a/drivers/iommu/iommufd/ioas.c > +++ b/drivers/iommu/iommufd/ioas.c > @@ -129,7 +129,8 @@ static int conv_iommu_prot(u32 map_flags) > * We provide no manual cache coherency ioctls to userspace and > most > * architectures make the CPU ops for cache flushing privileged. > * Therefore we require the underlying IOMMU to support CPU > coherent > - * operation. > + * operation. Support for IOMMU_CACHE is enforced by the > + * dev_is_dma_coherent() test during bind. > */ > iommu_prot = IOMMU_CACHE; > if (map_flags & IOMMU_IOAS_MAP_WRITEABLE) > > Looking at it I would say all the places that test > IOMMU_CAP_CACHE_COHERENCY can be replaced with > dev_is_dma_coherent() > except for the one call in VFIO that is supporting the Intel no-snoop > behavior. > > Then we can rename IOMMU_CAP_CACHE_COHERENCY to something like > IOMMU_CAP_ENFORCE_CACHE_COHERENCY and just create a > IOMMU_ENFORCE_CACHE prot flag for Intel IOMMU to use instead of > abusing IOMMU_CACHE. > Based on that here is a quick tweak of the force-snoop part (not compiled). Several notes: - IOMMU_CAP_CACHE_COHERENCY is kept as it's checked in vfio's group attach interface. Removing it may require a group_is_dma_coherent(); - vdpa is not changed as force-snoop is only for integrated GPU today which is not managed by vdpa. But adding the snoop support is easy if necessary; - vfio type1 reports force-snoop fact to KVM via VFIO_DMA_CC_IOMMU. For iommufd the compat layer may leverage that interface but more thoughts are required for non-compat usage how that can be reused or whether a new one is required between iommufd and kvm. Per earlier discussions Paolo prefers to reuse. diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5b196cf..06cca04 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -5110,7 +5110,8 @@ static int intel_iommu_map(struct iommu_domain *domain, prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) prot |= DMA_PTE_WRITE; - if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) + /* nothing to do for IOMMU_CACHE */ + if ((iommu_prot & IOMMU_SNOOP) && dmar_domain->iommu_snooping) prot |= DMA_PTE_SNP; max_addr = iova + size; @@ -5236,6 +5237,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, static bool intel_iommu_capable(enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) + return true; + if (cap == IOMMU_CAP_FORCE_SNOOP) return domain_update_iommu_snooping(NULL); if (cap == IOMMU_CAP_INTR_REMAP) return irq_remapping_enabled == 1; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 9394aa9..abc4cfe 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2270,6 +2270,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) domain->prot |= IOMMU_CACHE; + if (iommu_capable(bus, IOMMU_CAP_FORCE_SNOOP) + domain->prot |= IOMMU_SNOOP; + /* * Try to match an existing compatible domain. We don't want to * preclude an IOMMU driver supporting multiple bus_types and being @@ -2611,14 +2614,14 @@ static void vfio_iommu_type1_release(void *iommu_data) kfree(iommu); } -static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) +static int vfio_domains_have_iommu_snoop(struct vfio_iommu *iommu) { struct vfio_domain *domain; int ret = 1; mutex_lock(&iommu->lock); list_for_each_entry(domain, &iommu->domain_list, next) { - if (!(domain->prot & IOMMU_CACHE)) { + if (!(domain->prot & IOMMU_SNOOP)) { ret = 0; break; } @@ -2641,7 +2644,7 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, case VFIO_DMA_CC_IOMMU: if (!iommu) return 0; - return vfio_domains_have_iommu_cache(iommu); + return vfio_domains_have_iommu_snoop(iommu); default: return 0; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index de0c57a..45184d7 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -21,6 +21,8 @@ #define IOMMU_CACHE (1 << 2) /* DMA cache coherency */ #define IOMMU_NOEXEC (1 << 3) #define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */ +#define IOMMU_SNOOP (1 << 5) /* force DMA to snoop */ + /* * Where the bus hardware includes a privilege level as part of its access type * markings, and certain devices are capable of issuing transactions marked as @@ -106,6 +108,8 @@ enum iommu_cap { transactions */ IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */ IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */ + IOMMU_CAP_FORCE_SNOOP, /* IOMMU forces all transactions to + snoop cache */ }; /* These are the possible reserved region types */ Thanks Kevin