On Thu, 7 Apr 2022 12:23:45 -0300 Jason Gunthorpe <jgg@xxxxxxxxxx> wrote: > IOMMU_CACHE means "normal DMA to this iommu_domain's IOVA should be cache > coherent" and is used by the DMA API. The definition allows for special > non-coherent DMA to exist - ie processing of the no-snoop flag in PCIe > TLPs - so long as this behavior is opt-in by the device driver. > > The flag is mainly used by the DMA API to synchronize the IOMMU setting > with the expected cache behavior of the DMA master. eg based on > dev_is_dma_coherent() in some case. > > For Intel IOMMU IOMMU_CACHE was redefined to mean 'force all DMA to be > cache coherent' which has the practical effect of causing the IOMMU to > ignore the no-snoop bit in a PCIe TLP. > > x86 platforms are always IOMMU_CACHE, so Intel should ignore this flag. > > Instead use the new domain op enforce_cache_coherency() which causes every > IOPTE created in the domain to have the no-snoop blocking behavior. > > Reconfigure VFIO to always use IOMMU_CACHE and call > enforce_cache_coherency() to operate the special Intel behavior. > > Remove the IOMMU_CACHE test from Intel IOMMU. > > Ultimately VFIO plumbs the result of enforce_cache_coherency() back into > the x86 platform code through kvm_arch_register_noncoherent_dma() which > controls if the WBINVD instruction is available in the guest. No other > arch implements kvm_arch_register_noncoherent_dma(). I think this last sentence is alluding to it, but I wish the user visible change to VFIO_DMA_CC_IOMMU on non-x86 were more explicit. Perhaps for the last sentence: No other archs implement kvm_arch_register_noncoherent_dma() nor are there any other known consumers of VFIO_DMA_CC_IOMMU that might be affected by the user visible result change on non-x86 archs. Otherwise, Acked-by: Alex Williamson <alex.williamson@xxxxxxxxxx> > > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > --- > drivers/iommu/intel/iommu.c | 7 ++----- > drivers/vfio/vfio_iommu_type1.c | 30 +++++++++++++++++++----------- > include/linux/intel-iommu.h | 1 - > 3 files changed, 21 insertions(+), 17 deletions(-) > > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c > index f08611a6cc4799..8f3674e997df06 100644 > --- a/drivers/iommu/intel/iommu.c > +++ b/drivers/iommu/intel/iommu.c > @@ -641,7 +641,6 @@ static unsigned long domain_super_pgsize_bitmap(struct dmar_domain *domain) > static void domain_update_iommu_cap(struct dmar_domain *domain) > { > domain_update_iommu_coherency(domain); > - domain->iommu_snooping = domain_update_iommu_snooping(NULL); > domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL); > > /* > @@ -4283,7 +4282,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) > domain->agaw = width_to_agaw(adjust_width); > > domain->iommu_coherency = false; > - domain->iommu_snooping = false; > domain->iommu_superpage = 0; > domain->max_addr = 0; > > @@ -4422,8 +4420,7 @@ static int intel_iommu_map(struct iommu_domain *domain, > prot |= DMA_PTE_READ; > if (iommu_prot & IOMMU_WRITE) > prot |= DMA_PTE_WRITE; > - if (((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) || > - dmar_domain->enforce_no_snoop) > + if (dmar_domain->enforce_no_snoop) > prot |= DMA_PTE_SNP; > > max_addr = iova + size; > @@ -4550,7 +4547,7 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) > { > struct dmar_domain *dmar_domain = to_dmar_domain(domain); > > - if (!dmar_domain->iommu_snooping) > + if (!domain_update_iommu_snooping(NULL)) > return false; > dmar_domain->enforce_no_snoop = true; > return true; > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 9394aa9444c10c..c13b9290e35759 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -84,8 +84,8 @@ struct vfio_domain { > struct iommu_domain *domain; > struct list_head next; > struct list_head group_list; > - int prot; /* IOMMU_CACHE */ > - bool fgsp; /* Fine-grained super pages */ > + bool fgsp : 1; /* Fine-grained super pages */ > + bool enforce_cache_coherency : 1; > }; > > struct vfio_dma { > @@ -1461,7 +1461,7 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, > > list_for_each_entry(d, &iommu->domain_list, next) { > ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, > - npage << PAGE_SHIFT, prot | d->prot); > + npage << PAGE_SHIFT, prot | IOMMU_CACHE); > if (ret) > goto unwind; > > @@ -1771,7 +1771,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, > } > > ret = iommu_map(domain->domain, iova, phys, > - size, dma->prot | domain->prot); > + size, dma->prot | IOMMU_CACHE); > if (ret) { > if (!dma->iommu_mapped) { > vfio_unpin_pages_remote(dma, iova, > @@ -1859,7 +1859,7 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain) > return; > > ret = iommu_map(domain->domain, 0, page_to_phys(pages), PAGE_SIZE * 2, > - IOMMU_READ | IOMMU_WRITE | domain->prot); > + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); > if (!ret) { > size_t unmapped = iommu_unmap(domain->domain, 0, PAGE_SIZE); > > @@ -2267,8 +2267,15 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, > goto out_detach; > } > > - if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) > - domain->prot |= IOMMU_CACHE; > + /* > + * If the IOMMU can block non-coherent operations (ie PCIe TLPs with > + * no-snoop set) then VFIO always turns this feature on because on Intel > + * platforms it optimizes KVM to disable wbinvd emulation. > + */ > + if (domain->domain->ops->enforce_cache_coherency) > + domain->enforce_cache_coherency = > + domain->domain->ops->enforce_cache_coherency( > + domain->domain); > > /* > * Try to match an existing compatible domain. We don't want to > @@ -2279,7 +2286,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, > */ > list_for_each_entry(d, &iommu->domain_list, next) { > if (d->domain->ops == domain->domain->ops && > - d->prot == domain->prot) { > + d->enforce_cache_coherency == > + domain->enforce_cache_coherency) { > iommu_detach_group(domain->domain, group->iommu_group); > if (!iommu_attach_group(d->domain, > group->iommu_group)) { > @@ -2611,14 +2619,14 @@ static void vfio_iommu_type1_release(void *iommu_data) > kfree(iommu); > } > > -static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) > +static int vfio_domains_have_enforce_cache_coherency(struct vfio_iommu *iommu) > { > struct vfio_domain *domain; > int ret = 1; > > mutex_lock(&iommu->lock); > list_for_each_entry(domain, &iommu->domain_list, next) { > - if (!(domain->prot & IOMMU_CACHE)) { > + if (!(domain->enforce_cache_coherency)) { > ret = 0; > break; > } > @@ -2641,7 +2649,7 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, > case VFIO_DMA_CC_IOMMU: > if (!iommu) > return 0; > - return vfio_domains_have_iommu_cache(iommu); > + return vfio_domains_have_enforce_cache_coherency(iommu); > default: > return 0; > } > diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h > index 1f930c0c225d94..bc39f633efdf03 100644 > --- a/include/linux/intel-iommu.h > +++ b/include/linux/intel-iommu.h > @@ -539,7 +539,6 @@ struct dmar_domain { > > u8 has_iotlb_device: 1; > u8 iommu_coherency: 1; /* indicate coherency of iommu access */ > - u8 iommu_snooping: 1; /* indicate snooping control feature */ > u8 enforce_no_snoop : 1; /* Create IOPTEs with snoop control */ > > struct list_head devices; /* all devices' list */