Memory aliases with different memory type is a problem for guest. For the guest without assigned device, the memory type of guest memory would always been the same as host(WB); but for the assigned device, some part of memory may be used as DMA and then set to uncacheable memory type(UC/WC), which would be a conflict of host memory type then be a potential issue. Snooping control can guarantee the cache correctness of memory go through the DMA engine of VT-d. Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 5 ++++- arch/x86/kvm/mmu.c | 16 +++++----------- arch/x86/kvm/svm.c | 4 ++-- arch/x86/kvm/vmx.c | 29 ++++++++++++++++++++++++++--- virt/kvm/iommu.c | 27 ++++++++++++++++++++++++--- 5 files changed, 61 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ba6906f..b972889 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -395,6 +395,8 @@ struct kvm_arch{ struct list_head active_mmu_pages; struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; +#define KVM_IOMMU_CACHE_COHERENCY 0x1 + int iommu_flags; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; @@ -523,7 +525,7 @@ struct kvm_x86_ops { int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); - int (*get_mt_mask_shift)(void); + u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); }; extern struct kvm_x86_ops *kvm_x86_ops; @@ -551,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, gpa_t addr, unsigned long *ret); +u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); extern bool tdp_enabled; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 55c6923..ea1c2aa 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1606,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type *mtrr_state, return mtrr_state->def_type; } -static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) +u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) { u8 mtrr; @@ -1616,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) mtrr = MTRR_TYPE_WRBACK; return mtrr; } +EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { @@ -1688,16 +1689,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, spte |= shadow_user_mask; if (largepage) spte |= PT_PAGE_SIZE_MASK; - if (tdp_enabled) { - if (!kvm_is_mmio_pfn(pfn)) { - mt_mask = get_memory_type(vcpu, gfn) << - kvm_x86_ops->get_mt_mask_shift(); - mt_mask |= VMX_EPT_IGMT_BIT; - } else - mt_mask = MTRR_TYPE_UNCACHABLE << - kvm_x86_ops->get_mt_mask_shift(); - spte |= mt_mask; - } + if (tdp_enabled) + spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, + kvm_is_mmio_pfn(pfn)); spte |= (u64)pfn << PAGE_SHIFT; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 053f3c5..2bbe1de 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2617,7 +2617,7 @@ static int get_npt_level(void) #endif } -static int svm_get_mt_mask_shift(void) +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { return 0; } @@ -2678,7 +2678,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, - .get_mt_mask_shift = svm_get_mt_mask_shift, + .get_mt_mask = svm_get_mt_mask, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 85db8b2..db853b6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3678,9 +3678,32 @@ static int get_ept_level(void) return VMX_EPT_DEFAULT_GAW + 1; } -static int vmx_get_mt_mask_shift(void) +static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { - return VMX_EPT_MT_EPTE_SHIFT; + int ret; + + /* For VT-d and EPT combination + * 1. MMIO: always map as UC + * 2. EPT with VT-d: + * a. VT-d with snooping control feature: snooping control feature of + * VT-d engine can guarantee the cache correctness. Just set it + * to WB to keep consistent with host. So the same as item 3. + * b. VT-d without snooping control feature: can't guarantee the + * result, try to trust guest. + * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep + * consistent with host MTRR + */ + if (is_mmio) + ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; + else if (vcpu->kvm->arch.iommu_domain && + !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)) + ret = kvm_get_guest_memory_type(vcpu, gfn) << + VMX_EPT_MT_EPTE_SHIFT; + else + ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) + | VMX_EPT_IGMT_BIT; + + return ret; } static struct kvm_x86_ops vmx_x86_ops = { @@ -3737,7 +3760,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .interrupt_allowed = vmx_interrupt_allowed, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, - .get_mt_mask_shift = vmx_get_mt_mask_shift, + .get_mt_mask = vmx_get_mt_mask, }; static int __init vmx_init(void) diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 4c40375..1514758 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm, pfn_t pfn; int i, r = 0; struct iommu_domain *domain = kvm->arch.iommu_domain; + int flags; /* check if iommu exists and in use */ if (!domain) return 0; + flags = IOMMU_READ | IOMMU_WRITE; + if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) + flags |= IOMMU_CACHE; + for (i = 0; i < npages; i++) { /* check if already mapped */ if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) @@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, r = iommu_map_range(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), - PAGE_SIZE, - IOMMU_READ | IOMMU_WRITE); + PAGE_SIZE, flags); if (r) { printk(KERN_ERR "kvm_iommu_map_address:" "iommu failed to map pfn=%lx\n", pfn); @@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm, { struct pci_dev *pdev = NULL; struct iommu_domain *domain = kvm->arch.iommu_domain; - int r; + int r, last_flags; /* check if iommu exists and in use */ if (!domain) @@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm, return r; } + last_flags = kvm->arch.iommu_flags; + if (iommu_domain_has_cap(kvm->arch.iommu_domain, + IOMMU_CAP_CACHE_COHERENCY)) + kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY; + + /* Check if need to update IOMMU page table for guest memory */ + if ((last_flags ^ kvm->arch.iommu_flags) == + KVM_IOMMU_CACHE_COHERENCY) { + kvm_iommu_unmap_memslots(kvm); + r = kvm_iommu_map_memslots(kvm); + if (r) + goto out_unmap; + } + printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", assigned_dev->host_busnr, PCI_SLOT(assigned_dev->host_devfn), PCI_FUNC(assigned_dev->host_devfn)); return 0; +out_unmap: + kvm_iommu_unmap_memslots(kvm); + return r; } int kvm_deassign_device(struct kvm *kvm, -- 1.5.4.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html