[PATCH 2/2] KVM: Enable snooping control for supported hardware

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Memory aliases with different memory type is a problem for guest. For the guest
without assigned device, the memory type of guest memory would always been the
same as host(WB); but for the assigned device, some part of memory may be used
as DMA and then set to uncacheable memory type(UC/WC), which would be a conflict of
host memory type then be a potential issue.

Snooping control can guarantee the cache correctness of memory go through the
DMA engine of VT-d.

Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |    5 ++++-
 arch/x86/kvm/mmu.c              |   16 +++++-----------
 arch/x86/kvm/svm.c              |    4 ++--
 arch/x86/kvm/vmx.c              |   29 ++++++++++++++++++++++++++---
 virt/kvm/iommu.c                |   27 ++++++++++++++++++++++++---
 5 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ba6906f..b972889 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -395,6 +395,8 @@ struct kvm_arch{
 	struct list_head active_mmu_pages;
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
+#define KVM_IOMMU_CACHE_COHERENCY	0x1
+	int iommu_flags;
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
@@ -523,7 +525,7 @@ struct kvm_x86_ops {
 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
-	int (*get_mt_mask_shift)(void);
+	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
@@ -551,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  const void *val, int bytes);
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
 		  gpa_t addr, unsigned long *ret);
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 extern bool tdp_enabled;
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 55c6923..ea1c2aa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1606,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type *mtrr_state,
 	return mtrr_state->def_type;
 }
 
-static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
+u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
 	u8 mtrr;
 
@@ -1616,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
 		mtrr = MTRR_TYPE_WRBACK;
 	return mtrr;
 }
+EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type);
 
 static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
@@ -1688,16 +1689,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		spte |= shadow_user_mask;
 	if (largepage)
 		spte |= PT_PAGE_SIZE_MASK;
-	if (tdp_enabled) {
-		if (!kvm_is_mmio_pfn(pfn)) {
-			mt_mask = get_memory_type(vcpu, gfn) <<
-				kvm_x86_ops->get_mt_mask_shift();
-			mt_mask |= VMX_EPT_IGMT_BIT;
-		} else
-			mt_mask = MTRR_TYPE_UNCACHABLE <<
-				kvm_x86_ops->get_mt_mask_shift();
-		spte |= mt_mask;
-	}
+	if (tdp_enabled)
+		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
+			kvm_is_mmio_pfn(pfn));
 
 	spte |= (u64)pfn << PAGE_SHIFT;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 053f3c5..2bbe1de 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2617,7 +2617,7 @@ static int get_npt_level(void)
 #endif
 }
 
-static int svm_get_mt_mask_shift(void)
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
 	return 0;
 }
@@ -2678,7 +2678,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
-	.get_mt_mask_shift = svm_get_mt_mask_shift,
+	.get_mt_mask = svm_get_mt_mask,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 85db8b2..db853b6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3678,9 +3678,32 @@ static int get_ept_level(void)
 	return VMX_EPT_DEFAULT_GAW + 1;
 }
 
-static int vmx_get_mt_mask_shift(void)
+static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 {
-	return VMX_EPT_MT_EPTE_SHIFT;
+	int ret;
+
+	/* For VT-d and EPT combination
+	 * 1. MMIO: always map as UC
+	 * 2. EPT with VT-d:
+	 *   a. VT-d with snooping control feature: snooping control feature of
+	 *	VT-d engine can guarantee the cache correctness. Just set it
+	 *	to WB to keep consistent with host. So the same as item 3.
+	 *   b. VT-d without snooping control feature: can't guarantee the
+	 *	result, try to trust guest.
+	 * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep
+	 *    consistent with host MTRR
+	 */
+	if (is_mmio)
+		ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT;
+	else if (vcpu->kvm->arch.iommu_domain &&
+		!(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY))
+		ret = kvm_get_guest_memory_type(vcpu, gfn) <<
+		      VMX_EPT_MT_EPTE_SHIFT;
+	else
+		ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
+			| VMX_EPT_IGMT_BIT;
+
+	return ret;
 }
 
 static struct kvm_x86_ops vmx_x86_ops = {
@@ -3737,7 +3760,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.interrupt_allowed = vmx_interrupt_allowed,
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
-	.get_mt_mask_shift = vmx_get_mt_mask_shift,
+	.get_mt_mask = vmx_get_mt_mask,
 };
 
 static int __init vmx_init(void)
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 4c40375..1514758 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 	pfn_t pfn;
 	int i, r = 0;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
+	int flags;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
+	flags = IOMMU_READ | IOMMU_WRITE;
+	if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)
+		flags |= IOMMU_CACHE;
+
 	for (i = 0; i < npages; i++) {
 		/* check if already mapped */
 		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
@@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
 		r = iommu_map_range(domain,
 				    gfn_to_gpa(gfn),
 				    pfn_to_hpa(pfn),
-				    PAGE_SIZE,
-				    IOMMU_READ | IOMMU_WRITE);
+				    PAGE_SIZE, flags);
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%lx\n", pfn);
@@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm,
 {
 	struct pci_dev *pdev = NULL;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	int r;
+	int r, last_flags;
 
 	/* check if iommu exists and in use */
 	if (!domain)
@@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm,
 		return r;
 	}
 
+	last_flags = kvm->arch.iommu_flags;
+	if (iommu_domain_has_cap(kvm->arch.iommu_domain,
+				 IOMMU_CAP_CACHE_COHERENCY))
+		kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY;
+
+	/* Check if need to update IOMMU page table for guest memory */
+	if ((last_flags ^ kvm->arch.iommu_flags) ==
+			KVM_IOMMU_CACHE_COHERENCY) {
+		kvm_iommu_unmap_memslots(kvm);
+		r = kvm_iommu_map_memslots(kvm);
+		if (r)
+			goto out_unmap;
+	}
+
 	printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
 		assigned_dev->host_busnr,
 		PCI_SLOT(assigned_dev->host_devfn),
 		PCI_FUNC(assigned_dev->host_devfn));
 
 	return 0;
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
 }
 
 int kvm_deassign_device(struct kvm *kvm,
-- 
1.5.4.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux