Flush CPU cache on DMA pages before mapping them into the first non-coherent domain (domain that does not enforce cache coherency, i.e. CPU caches are not force-snooped) and after unmapping them from the last domain. Devices attached to non-coherent domains can execute non-coherent DMAs (DMAs that lack CPU cache snooping) to access physical memory with CPU caches bypassed. Such a scenario could be exploited by a malicious guest, allowing them to access stale host data in memory rather than the data initialized by the host (e.g., zeros) in the cache, thus posing a risk of information leakage attack. Furthermore, the host kernel (e.g. a ksm thread) might encounter inconsistent data between the CPU cache and memory (left by a malicious guest) after a page is unpinned for DMA but before it's recycled. Therefore, it is required to flush the CPU cache before a page is accessible to non-coherent DMAs and after the page is inaccessible to non-coherent DMAs. However, the CPU cache is not flushed immediately when the page is unmapped from the last non-coherent domain. Instead, the flushing is performed lazily, right before the page is unpinned. Take the following example to illustrate the process. The CPU cache is flushed right before step 2 and step 5. 1. A page is mapped into a coherent domain. 2. The page is mapped into a non-coherent domain. 3. The page is unmapped from the non-coherent domain e.g.due to hot-unplug. 4. The page is unmapped from the coherent domain. 5. The page is unpinned. Reasons for adopting this lazily flushing design include: - There're several unmap paths and only one unpin path. Lazily flush before unpin wipes out the inconsistency between cache and physical memory before a page is globally visible and produces code that is simpler, more maintainable and easier to backport. - Avoid dividing a large unmap range into several smaller ones or allocating additional memory to hold IOVA to HPA relationship. Reported-by: Jason Gunthorpe <jgg@xxxxxxxxxx> Closes: https://lore.kernel.org/lkml/20240109002220.GA439767@xxxxxxxxxx Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation") Cc: Alex Williamson <alex.williamson@xxxxxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxxxx> Cc: Kevin Tian <kevin.tian@xxxxxxxxx> Signed-off-by: Yan Zhao <yan.y.zhao@xxxxxxxxx> --- drivers/vfio/vfio_iommu_type1.c | 51 +++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index b5c15fe8f9fc..ce873f4220bf 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -74,6 +74,7 @@ struct vfio_iommu { bool v2; bool nesting; bool dirty_page_tracking; + bool has_noncoherent_domain; struct list_head emulated_iommu_groups; }; @@ -99,6 +100,7 @@ struct vfio_dma { unsigned long *bitmap; struct mm_struct *mm; size_t locked_vm; + bool cache_flush_required; /* For noncoherent domain */ }; struct vfio_batch { @@ -716,6 +718,9 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, long unlocked = 0, locked = 0; long i; + if (dma->cache_flush_required) + arch_clean_nonsnoop_dma(pfn << PAGE_SHIFT, npage << PAGE_SHIFT); + for (i = 0; i < npage; i++, iova += PAGE_SIZE) { if (put_pfn(pfn++, dma->prot)) { unlocked++; @@ -1099,6 +1104,8 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, &iotlb_gather); } + dma->cache_flush_required = false; + if (do_accounting) { vfio_lock_acct(dma, -unlocked, true); return 0; @@ -1120,6 +1127,21 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) iommu->dma_avail++; } +static void vfio_update_noncoherent_domain_state(struct vfio_iommu *iommu) +{ + struct vfio_domain *domain; + bool has_noncoherent = false; + + list_for_each_entry(domain, &iommu->domain_list, next) { + if (domain->enforce_cache_coherency) + continue; + + has_noncoherent = true; + break; + } + iommu->has_noncoherent_domain = has_noncoherent; +} + static void vfio_update_pgsize_bitmap(struct vfio_iommu *iommu) { struct vfio_domain *domain; @@ -1455,6 +1477,12 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, vfio_batch_init(&batch); + /* + * Record necessity to flush CPU cache to make sure CPU cache is flushed + * for both pin & map and unmap & unpin (for unwind) paths. + */ + dma->cache_flush_required = iommu->has_noncoherent_domain; + while (size) { /* Pin a contiguous chunk of memory */ npage = vfio_pin_pages_remote(dma, vaddr + dma->size, @@ -1466,6 +1494,10 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, break; } + if (dma->cache_flush_required) + arch_clean_nonsnoop_dma(pfn << PAGE_SHIFT, + npage << PAGE_SHIFT); + /* Map it! */ ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, dma->prot); @@ -1683,9 +1715,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, for (; n; n = rb_next(n)) { struct vfio_dma *dma; dma_addr_t iova; + bool cache_flush_required; dma = rb_entry(n, struct vfio_dma, node); iova = dma->iova; + cache_flush_required = !domain->enforce_cache_coherency && + !dma->cache_flush_required; + if (cache_flush_required) + dma->cache_flush_required = true; while (iova < dma->iova + dma->size) { phys_addr_t phys; @@ -1737,6 +1774,9 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, size = npage << PAGE_SHIFT; } + if (cache_flush_required) + arch_clean_nonsnoop_dma(phys, size); + ret = iommu_map(domain->domain, iova, phys, size, dma->prot | IOMMU_CACHE, GFP_KERNEL_ACCOUNT); @@ -1801,6 +1841,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT, size >> PAGE_SHIFT, true); } + dma->cache_flush_required = false; } vfio_batch_fini(&batch); @@ -1828,6 +1869,9 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head * if (!pages) return; + if (!domain->enforce_cache_coherency) + arch_clean_nonsnoop_dma(page_to_phys(pages), PAGE_SIZE * 2); + list_for_each_entry(region, regions, list) { start = ALIGN(region->start, PAGE_SIZE * 2); if (start >= region->end || (region->end - start < PAGE_SIZE * 2)) @@ -1847,6 +1891,9 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head * break; } + if (!domain->enforce_cache_coherency) + arch_clean_nonsnoop_dma(page_to_phys(pages), PAGE_SIZE * 2); + __free_pages(pages, order); } @@ -2308,6 +2355,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, list_add(&domain->next, &iommu->domain_list); vfio_update_pgsize_bitmap(iommu); + if (!domain->enforce_cache_coherency) + vfio_update_noncoherent_domain_state(iommu); done: /* Delete the old one and insert new iova list */ vfio_iommu_iova_insert_copy(iommu, &iova_copy); @@ -2508,6 +2557,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, } iommu_domain_free(domain->domain); list_del(&domain->next); + if (!domain->enforce_cache_coherency) + vfio_update_noncoherent_domain_state(iommu); kfree(domain); vfio_iommu_aper_expand(iommu, &iova_copy); vfio_update_pgsize_bitmap(iommu); -- 2.17.1