Introduce and export interface arch_clean_nonsnoop_dma() to flush CPU caches for memory involved in non-coherent DMAs (DMAs that lack CPU cache snooping). When IOMMU does not enforce cache coherency, devices are allowed to perform non-coherent DMAs. This scenario poses a risk of information leakage when the device is assigned into a VM. Specifically, a malicious guest could potentially retrieve stale host data through non-coherent DMA reads to physical memory, with data initialized by host (e.g., zeros) still residing in the cache. Additionally, host kernel (e.g. by a ksm kthread) is possible to read inconsistent data from CPU cache/memory (left by a malicious guest) after a page is unpinned for non-coherent DMA but before it's freed. Therefore, VFIO/IOMMUFD must initiate a CPU cache flush for pages involved in non-coherent DMAs prior to or following their mapping or unmapping to or from the IOMMU. Introduce and export an interface accepting a contiguous physical address range as input to help flush CPU caches in architecture specific way for VFIO/IOMMUFD. (Currently, x86 only). Given CLFLUSH on MMIOs in x86 is generally undesired and sometimes will cause MCE on certain platforms (e.g. executing CLFLUSH on VGA ranges 0xA0000-0xBFFFF causes MCE on some platforms). Meanwhile, some MMIOs are cacheable and demands CLFLUSH (e.g. certain MMIOs for PMEM). Hence, a method of checking host PAT/MTRR for uncacheable memory is adopted. This implementation always performs CLFLUSH on "pfn_valid() && !reserved" pages (since they are not possible to be MMIOs). For the reserved or !pfn_valid() cases, check host PAT/MTRR to bypass uncacheable physical ranges in host and do CFLUSH on the rest cacheable ranges. Cc: Alex Williamson <alex.williamson@xxxxxxxxxx> Cc: Jason Gunthorpe <jgg@xxxxxxxxxx> Cc: Kevin Tian <kevin.tian@xxxxxxxxx> Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx> Signed-off-by: Yan Zhao <yan.y.zhao@xxxxxxxxx> --- arch/x86/include/asm/cacheflush.h | 3 ++ arch/x86/mm/pat/set_memory.c | 88 +++++++++++++++++++++++++++++++ include/linux/cacheflush.h | 6 +++ 3 files changed, 97 insertions(+) diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index b192d917a6d0..b63607994285 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -10,4 +10,7 @@ void clflush_cache_range(void *addr, unsigned int size); +void arch_clean_nonsnoop_dma(phys_addr_t phys, size_t length); +#define arch_clean_nonsnoop_dma arch_clean_nonsnoop_dma + #endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 80c9037ffadf..7ff08ad20369 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -34,6 +34,7 @@ #include <asm/memtype.h> #include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> +#include <asm/mtrr.h> #include "../mm_internal.h" @@ -349,6 +350,93 @@ void arch_invalidate_pmem(void *addr, size_t size) EXPORT_SYMBOL_GPL(arch_invalidate_pmem); #endif +/* + * Flush pfn_valid() and !PageReserved() page + */ +static void clflush_page(struct page *page) +{ + const int size = boot_cpu_data.x86_clflush_size; + unsigned int i; + void *va; + + va = kmap_local_page(page); + + /* CLFLUSHOPT is unordered and requires full memory barrier */ + mb(); + for (i = 0; i < PAGE_SIZE; i += size) + clflushopt(va + i); + /* CLFLUSHOPT is unordered and requires full memory barrier */ + mb(); + + kunmap_local(va); +} + +/* + * Flush a reserved page or !pfn_valid() PFN. + * Flush is not performed if the PFN is accessed in uncacheable type. i.e. + * - PAT type is UC/UC-/WC when PAT is enabled + * - MTRR type is UC/WC/WT/WP when PAT is not enabled. + * (no need to do CLFLUSH though WT/WP is cacheable). + */ +static void clflush_reserved_or_invalid_pfn(unsigned long pfn) +{ + const int size = boot_cpu_data.x86_clflush_size; + unsigned int i; + void *va; + + if (!pat_enabled()) { + u64 start = PFN_PHYS(pfn), end = start + PAGE_SIZE; + u8 mtrr_type, uniform; + + mtrr_type = mtrr_type_lookup(start, end, &uniform); + if (mtrr_type != MTRR_TYPE_WRBACK) + return; + } else if (pat_pfn_immune_to_uc_mtrr(pfn)) { + return; + } + + va = memremap(pfn << PAGE_SHIFT, PAGE_SIZE, MEMREMAP_WB); + if (!va) + return; + + /* CLFLUSHOPT is unordered and requires full memory barrier */ + mb(); + for (i = 0; i < PAGE_SIZE; i += size) + clflushopt(va + i); + /* CLFLUSHOPT is unordered and requires full memory barrier */ + mb(); + + memunmap(va); +} + +static inline void clflush_pfn(unsigned long pfn) +{ + if (pfn_valid(pfn) && + (!PageReserved(pfn_to_page(pfn)) || is_zero_pfn(pfn))) + return clflush_page(pfn_to_page(pfn)); + + clflush_reserved_or_invalid_pfn(pfn); +} + +/** + * arch_clean_nonsnoop_dma - flush a cache range for non-coherent DMAs + * (DMAs that lack CPU cache snooping). + * @phys_addr: physical address start + * @length: number of bytes to flush + */ +void arch_clean_nonsnoop_dma(phys_addr_t phys_addr, size_t length) +{ + unsigned long nrpages, pfn; + unsigned long i; + + pfn = PHYS_PFN(phys_addr); + nrpages = PAGE_ALIGN((phys_addr & ~PAGE_MASK) + length) >> PAGE_SHIFT; + + for (i = 0; i < nrpages; i++, pfn++) + clflush_pfn(pfn); +} +EXPORT_SYMBOL_GPL(arch_clean_nonsnoop_dma); + #ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION bool cpu_cache_has_invalidate_memregion(void) { diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h index 55f297b2c23f..0bfc6551c6d3 100644 --- a/include/linux/cacheflush.h +++ b/include/linux/cacheflush.h @@ -26,4 +26,10 @@ static inline void flush_icache_pages(struct vm_area_struct *vma, #define flush_icache_page(vma, page) flush_icache_pages(vma, page, 1) +#ifndef arch_clean_nonsnoop_dma +static inline void arch_clean_nonsnoop_dma(phys_addr_t phys, size_t length) +{ +} +#endif + #endif /* _LINUX_CACHEFLUSH_H */ -- 2.17.1