IOMMU_CACHE means "normal DMA to this iommu_domain's IOVA should be cache coherent" and is used by the DMA API. The definition allows for special non-coherent DMA to exist - ie processing of the no-snoop flag in PCIe TLPs - so long as this behavior is opt-in by the device driver. The flag is mainly used by the DMA API to synchronize the IOMMU setting with the expected cache behavior of the DMA master. eg based on dev_is_dma_coherent() in some case. For Intel IOMMU IOMMU_CACHE was redefined to mean 'force all DMA to be cache coherent' which has the practical effect of causing the IOMMU to ignore the no-snoop bit in a PCIe TLP. x86 platforms are always IOMMU_CACHE, so Intel should ignore this flag. Instead use the new domain op enforce_cache_coherency() which causes every IOPTE created in the domain to have the no-snoop blocking behavior. Reconfigure VFIO to always use IOMMU_CACHE and call enforce_cache_coherency() to operate the special Intel behavior. Remove the IOMMU_CACHE test from Intel IOMMU. Ultimately VFIO plumbs the result of enforce_cache_coherency() back into the x86 platform code through kvm_arch_register_noncoherent_dma() which controls if the WBINVD instruction is available in the guest. No other archs implement kvm_arch_register_noncoherent_dma() nor are there any other known consumers of VFIO_DMA_CC_IOMMU that might be affected by the user visible result change on non-x86 archs. Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx> Reviewed-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx> Acked-by: Alex Williamson <alex.williamson@xxxxxxxxxx> Acked-by: Robin Murphy <robin.murphy@xxxxxxx> Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> --- drivers/iommu/intel/iommu.c | 7 ++----- drivers/vfio/vfio_iommu_type1.c | 30 +++++++++++++++++++----------- include/linux/intel-iommu.h | 1 - 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 8e8adecc6a434e..2332060e059c3d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -641,7 +641,6 @@ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain) static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); - domain->iommu_snooping = domain_update_iommu_snooping(NULL); domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL); /* @@ -4283,7 +4282,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->agaw = width_to_agaw(adjust_width); domain->iommu_coherency = false; - domain->iommu_snooping = false; domain->iommu_superpage = 0; domain->max_addr = 0; @@ -4422,8 +4420,7 @@ static int intel_iommu_map(struct iommu_domain *domain, prot |= DMA_PTE_READ; if (iommu_prot & IOMMU_WRITE) prot |= DMA_PTE_WRITE; - if (((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) || - dmar_domain->force_snooping) + if (dmar_domain->force_snooping) prot |= DMA_PTE_SNP; max_addr = iova + size; @@ -4550,7 +4547,7 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); - if (!dmar_domain->iommu_snooping) + if (!domain_update_iommu_snooping(NULL)) return false; dmar_domain->force_snooping = true; return true; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 9394aa9444c10c..c13b9290e35759 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -84,8 +84,8 @@ struct vfio_domain { struct iommu_domain *domain; struct list_head next; struct list_head group_list; - int prot; /* IOMMU_CACHE */ - bool fgsp; /* Fine-grained super pages */ + bool fgsp : 1; /* Fine-grained super pages */ + bool enforce_cache_coherency : 1; }; struct vfio_dma { @@ -1461,7 +1461,7 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, - npage << PAGE_SHIFT, prot | d->prot); + npage << PAGE_SHIFT, prot | IOMMU_CACHE); if (ret) goto unwind; @@ -1771,7 +1771,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, } ret = iommu_map(domain->domain, iova, phys, - size, dma->prot | domain->prot); + size, dma->prot | IOMMU_CACHE); if (ret) { if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, @@ -1859,7 +1859,7 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain) return; ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2, - IOMMU_READ | IOMMU_WRITE | domain->prot); + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); if (!ret) { size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE); @@ -2267,8 +2267,15 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_detach; } - if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) - domain->prot |= IOMMU_CACHE; + /* + * If the IOMMU can block non-coherent operations (ie PCIe TLPs with + * no-snoop set) then VFIO always turns this feature on because on Intel + * platforms it optimizes KVM to disable wbinvd emulation. + */ + if (domain->domain->ops->enforce_cache_coherency) + domain->enforce_cache_coherency = + domain->domain->ops->enforce_cache_coherency( + domain->domain); /* * Try to match an existing compatible domain. We don't want to @@ -2279,7 +2286,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, */ list_for_each_entry(d, &iommu->domain_list, next) { if (d->domain->ops == domain->domain->ops && - d->prot == domain->prot) { + d->enforce_cache_coherency == + domain->enforce_cache_coherency) { iommu_detach_group(domain->domain, group->iommu_group); if (!iommu_attach_group(d->domain, group->iommu_group)) { @@ -2611,14 +2619,14 @@ static void vfio_iommu_type1_release(void *iommu_data) kfree(iommu); } -static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) +static int vfio_domains_have_enforce_cache_coherency(struct vfio_iommu *iommu) { struct vfio_domain *domain; int ret = 1; mutex_lock(&iommu->lock); list_for_each_entry(domain, &iommu->domain_list, next) { - if (!(domain->prot & IOMMU_CACHE)) { + if (!(domain->enforce_cache_coherency)) { ret = 0; break; } @@ -2641,7 +2649,7 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, case VFIO_DMA_CC_IOMMU: if (!iommu) return 0; - return vfio_domains_have_iommu_cache(iommu); + return vfio_domains_have_enforce_cache_coherency(iommu); default: return 0; } diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4c2baf2446c277..72e5d7900e717f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -539,7 +539,6 @@ struct dmar_domain { u8 has_iotlb_device: 1; u8 iommu_coherency: 1; /* indicate coherency of iommu access */ - u8 iommu_snooping: 1; /* indicate snooping control feature */ u8 force_snooping : 1; /* Create IOPTEs with snoop control */ struct list_head devices; /* all devices' list */ -- 2.35.1