IOMMU_CACHE means "normal DMA to this iommu_domain's IOVA should be cache coherent" and is used by the DMA API. The definition allows for special non-coherent DMA to exist - ie processing of the no-snoop flag in PCIe TLPs - so long as this behavior is opt-in by the device driver. The flag is used by the DMA API and is always available if dev_is_dma_coherent() is set. For Intel IOMMU IOMMU_CACHE was redefined to mean 'force all DMA to be cache coherent' which has the practical effect of causing the IOMMU to ignore the no-snoop bit in a PCIe TLP. x86 platforms are always IOMMU_CACHE, so Intel should ignore this flag. Instead use the new domain op enforce_cache_coherency() which causes every IOPTE created in the domain to have the no-snoop blocking behavior. Reconfigure VFIO to always use IOMMU_CACHE and call enforce_cache_coherency() to operate the special Intel behavior. Remove the IOMMU_CACHE test from Intel IOMMU. Ultimately VFIO plumbs the result of enforce_cache_coherency() back into the x86 platform code through kvm_arch_register_noncoherent_dma() which controls if the WBINVD instruction is available in the guest. No other arch implements kvm_arch_register_noncoherent_dma(). Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> --- drivers/iommu/intel/iommu.c | 3 +-- drivers/vfio/vfio_iommu_type1.c | 30 +++++++++++++++++++----------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f08611a6cc4799..0ca43671d934e9 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4422,8 +4422,7 @@ static int intel_iommu_map(struct iommu_domain *domain, prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) prot |= DMA_PTE_WRITE; - if (((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) || - dmar_domain->enforce_no_snoop) + if (dmar_domain->enforce_no_snoop) prot |= DMA_PTE_SNP; max_addr = iova + size; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 9394aa9444c10c..c13b9290e35759 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -84,8 +84,8 @@ struct vfio_domain { struct iommu_domain *domain; struct list_head next; struct list_head group_list; - int prot; /* IOMMU_CACHE */ - bool fgsp; /* Fine-grained super pages */ + bool fgsp : 1; /* Fine-grained super pages */ + bool enforce_cache_coherency : 1; }; struct vfio_dma { @@ -1461,7 +1461,7 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, - npage << PAGE_SHIFT, prot | d->prot); + npage << PAGE_SHIFT, prot | IOMMU_CACHE); if (ret) goto unwind; @@ -1771,7 +1771,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, } ret = iommu_map(domain->domain, iova, phys, - size, dma->prot | domain->prot); + size, dma->prot | IOMMU_CACHE); if (ret) { if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, @@ -1859,7 +1859,7 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain) return; ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2, - IOMMU_READ | IOMMU_WRITE | domain->prot); + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); if (!ret) { size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE); @@ -2267,8 +2267,15 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_detach; } - if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) - domain->prot |= IOMMU_CACHE; + /* + * If the IOMMU can block non-coherent operations (ie PCIe TLPs with + * no-snoop set) then VFIO always turns this feature on because on Intel + * platforms it optimizes KVM to disable wbinvd emulation. + */ + if (domain->domain->ops->enforce_cache_coherency) + domain->enforce_cache_coherency = + domain->domain->ops->enforce_cache_coherency( + domain->domain); /* * Try to match an existing compatible domain. We don't want to @@ -2279,7 +2286,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, */ list_for_each_entry(d, &iommu->domain_list, next) { if (d->domain->ops == domain->domain->ops && - d->prot == domain->prot) { + d->enforce_cache_coherency == + domain->enforce_cache_coherency) { iommu_detach_group(domain->domain, group->iommu_group); if (!iommu_attach_group(d->domain, group->iommu_group)) { @@ -2611,14 +2619,14 @@ static void vfio_iommu_type1_release(void *iommu_data) kfree(iommu); } -static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) +static int vfio_domains_have_enforce_cache_coherency(struct vfio_iommu *iommu) { struct vfio_domain *domain; int ret = 1; mutex_lock(&iommu->lock); list_for_each_entry(domain, &iommu->domain_list, next) { - if (!(domain->prot & IOMMU_CACHE)) { + if (!(domain->enforce_cache_coherency)) { ret = 0; break; } @@ -2641,7 +2649,7 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, case VFIO_DMA_CC_IOMMU: if (!iommu) return 0; - return vfio_domains_have_iommu_cache(iommu); + return vfio_domains_have_enforce_cache_coherency(iommu); default: return 0; } -- 2.35.1