[PATCH 3/5] x86/mm: Introduce and export interface arch_clean_nonsnoop_dma()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Introduce and export interface arch_clean_nonsnoop_dma() to flush CPU
caches for memory involved in non-coherent DMAs (DMAs that lack CPU cache
snooping).

When IOMMU does not enforce cache coherency, devices are allowed to perform
non-coherent DMAs. This scenario poses a risk of information leakage when
the device is assigned into a VM. Specifically, a malicious guest could
potentially retrieve stale host data through non-coherent DMA reads to
physical memory, with data initialized by host (e.g., zeros) still residing
in the cache.

Additionally, host kernel (e.g. by a ksm kthread) is possible to read
inconsistent data from CPU cache/memory (left by a malicious guest) after
a page is unpinned for non-coherent DMA but before it's freed.

Therefore, VFIO/IOMMUFD must initiate a CPU cache flush for pages involved
in non-coherent DMAs prior to or following their mapping or unmapping to or
from the IOMMU.

Introduce and export an interface accepting a contiguous physical address
range as input to help flush CPU caches in architecture specific way for
VFIO/IOMMUFD. (Currently, x86 only).

Given CLFLUSH on MMIOs in x86 is generally undesired and sometimes will
cause MCE on certain platforms (e.g. executing CLFLUSH on VGA ranges
0xA0000-0xBFFFF causes MCE on some platforms). Meanwhile, some MMIOs are
cacheable and demands CLFLUSH (e.g. certain MMIOs for PMEM). Hence, a
method of checking host PAT/MTRR for uncacheable memory is adopted.

This implementation always performs CLFLUSH on "pfn_valid() && !reserved"
pages (since they are not possible to be MMIOs).
For the reserved or !pfn_valid() cases, check host PAT/MTRR to bypass
uncacheable physical ranges in host and do CFLUSH on the rest cacheable
ranges.

Cc: Alex Williamson <alex.williamson@xxxxxxxxxx>
Cc: Jason Gunthorpe <jgg@xxxxxxxxxx>
Cc: Kevin Tian <kevin.tian@xxxxxxxxx>
Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Signed-off-by: Yan Zhao <yan.y.zhao@xxxxxxxxx>
---
 arch/x86/include/asm/cacheflush.h |  3 ++
 arch/x86/mm/pat/set_memory.c      | 88 +++++++++++++++++++++++++++++++
 include/linux/cacheflush.h        |  6 +++
 3 files changed, 97 insertions(+)

diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index b192d917a6d0..b63607994285 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -10,4 +10,7 @@
 
 void clflush_cache_range(void *addr, unsigned int size);
 
+void arch_clean_nonsnoop_dma(phys_addr_t phys, size_t length);
+#define arch_clean_nonsnoop_dma arch_clean_nonsnoop_dma
+
 #endif /* _ASM_X86_CACHEFLUSH_H */
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 80c9037ffadf..7ff08ad20369 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -34,6 +34,7 @@
 #include <asm/memtype.h>
 #include <asm/hyperv-tlfs.h>
 #include <asm/mshyperv.h>
+#include <asm/mtrr.h>
 
 #include "../mm_internal.h"
 
@@ -349,6 +350,93 @@ void arch_invalidate_pmem(void *addr, size_t size)
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 #endif
 
+/*
+ * Flush pfn_valid() and !PageReserved() page
+ */
+static void clflush_page(struct page *page)
+{
+	const int size = boot_cpu_data.x86_clflush_size;
+	unsigned int i;
+	void *va;
+
+	va = kmap_local_page(page);
+
+	/* CLFLUSHOPT is unordered and requires full memory barrier */
+	mb();
+	for (i = 0; i < PAGE_SIZE; i += size)
+		clflushopt(va + i);
+	/* CLFLUSHOPT is unordered and requires full memory barrier */
+	mb();
+
+	kunmap_local(va);
+}
+
+/*
+ * Flush a reserved page or !pfn_valid() PFN.
+ * Flush is not performed if the PFN is accessed in uncacheable type. i.e.
+ * - PAT type is UC/UC-/WC when PAT is enabled
+ * - MTRR type is UC/WC/WT/WP when PAT is not enabled.
+ *   (no need to do CLFLUSH though WT/WP is cacheable).
+ */
+static void clflush_reserved_or_invalid_pfn(unsigned long pfn)
+{
+	const int size = boot_cpu_data.x86_clflush_size;
+	unsigned int i;
+	void *va;
+
+	if (!pat_enabled()) {
+		u64 start = PFN_PHYS(pfn), end = start + PAGE_SIZE;
+		u8 mtrr_type, uniform;
+
+		mtrr_type = mtrr_type_lookup(start, end, &uniform);
+		if (mtrr_type != MTRR_TYPE_WRBACK)
+			return;
+	} else if (pat_pfn_immune_to_uc_mtrr(pfn)) {
+		return;
+	}
+
+	va = memremap(pfn << PAGE_SHIFT, PAGE_SIZE, MEMREMAP_WB);
+	if (!va)
+		return;
+
+	/* CLFLUSHOPT is unordered and requires full memory barrier */
+	mb();
+	for (i = 0; i < PAGE_SIZE; i += size)
+		clflushopt(va + i);
+	/* CLFLUSHOPT is unordered and requires full memory barrier */
+	mb();
+
+	memunmap(va);
+}
+
+static inline void clflush_pfn(unsigned long pfn)
+{
+	if (pfn_valid(pfn) &&
+	    (!PageReserved(pfn_to_page(pfn)) || is_zero_pfn(pfn)))
+		return clflush_page(pfn_to_page(pfn));
+
+	clflush_reserved_or_invalid_pfn(pfn);
+}
+
+/**
+ * arch_clean_nonsnoop_dma - flush a cache range for non-coherent DMAs
+ *                           (DMAs that lack CPU cache snooping).
+ * @phys_addr:	physical address start
+ * @length:	number of bytes to flush
+ */
+void arch_clean_nonsnoop_dma(phys_addr_t phys_addr, size_t length)
+{
+	unsigned long nrpages, pfn;
+	unsigned long i;
+
+	pfn = PHYS_PFN(phys_addr);
+	nrpages = PAGE_ALIGN((phys_addr & ~PAGE_MASK) + length) >> PAGE_SHIFT;
+
+	for (i = 0; i < nrpages; i++, pfn++)
+		clflush_pfn(pfn);
+}
+EXPORT_SYMBOL_GPL(arch_clean_nonsnoop_dma);
+
 #ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
 bool cpu_cache_has_invalidate_memregion(void)
 {
diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h
index 55f297b2c23f..0bfc6551c6d3 100644
--- a/include/linux/cacheflush.h
+++ b/include/linux/cacheflush.h
@@ -26,4 +26,10 @@ static inline void flush_icache_pages(struct vm_area_struct *vma,
 
 #define flush_icache_page(vma, page)	flush_icache_pages(vma, page, 1)
 
+#ifndef arch_clean_nonsnoop_dma
+static inline void arch_clean_nonsnoop_dma(phys_addr_t phys, size_t length)
+{
+}
+#endif
+
 #endif /* _LINUX_CACHEFLUSH_H */
-- 
2.17.1





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux