when passthrough some pcie device, such as gpus(including Nvidia and AMD),kvm will report:"Unsupported FSC: EC=0x24 xFSC=0x21 ESR_EL2=0x92000061" err.the main reason is vfio ioremap vga memory type by DEVICE_nGnRnE, and kvm setting memory type to PAGE_S2_DEVICE(DEVICE_nGnRE), but in guestos, all of device io memory type when ioremapping (including gpu driver TTM memory type) is setting to MT_NORMAL_NC. according to ARM64 stage1&stage2 conbining rules. memory type attributes combining rules: Normal-WB<Normal-WT<NormalNC<Device-GRE<Device-nGRE< DevicenGnRE<Device-nGnRnE Normal-WB is weakest,Device-nGnRnE is strongest. refferring to 'Arm Architecture Reference Manual Armv8, for Armv8-A architecture profile' pdf, chapter B2.8 refferring to 'ARM System Memory Management Unit Architecture Specification SMMU architecture version 3.0 and version 3.1' pdf, chapter 13.1.5 therefore, the I/O memory attribute of the VM is setting to DevicenGnRE maybe is a mistake. it causes all device memory accessing in the virtual machine must be aligned. To summarize: stage2 memory type cannot be stronger than stage1 in arm64 archtechture. Signed-off-by: xieming <xieming@xxxxxxxxxx> --- arch/arm/include/asm/kvm_mmu.h | 3 ++- arch/arm64/include/asm/kvm_mmu.h | 3 ++- arch/arm64/include/asm/memory.h | 4 +++- arch/arm64/include/asm/pgtable-prot.h | 2 +- drivers/vfio/pci/vfio_pci.c | 7 +++++++ virt/kvm/arm/mmu.c | 19 ++++++++++++++++--- virt/kvm/arm/vgic/vgic-v2.c | 2 +- 7 files changed, 32 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 523c499e42db..5c7869d25b62 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -64,7 +64,8 @@ void stage2_unmap_vm(struct kvm *kvm); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, - phys_addr_t pa, unsigned long size, bool writable); + phys_addr_t pa, unsigned long size, + bool writable, bool writecombine); int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b2558447c67d..3f98286c7498 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -158,7 +158,8 @@ void stage2_unmap_vm(struct kvm *kvm); int kvm_alloc_stage2_pgd(struct kvm *kvm); void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, - phys_addr_t pa, unsigned long size, bool writable); + phys_addr_t pa, unsigned long size, + bool writable, bool writecombine); int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 56562ff01076..22565cc8ec20 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -158,13 +158,15 @@ * Memory types for Stage-2 translation */ #define MT_S2_NORMAL 0xf +#define MT_S2_NORMAL_NC 0x5 #define MT_S2_DEVICE_nGnRE 0x1 /* * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001 - * Stage-2 enforces Normal-WB and Device-nGnRE + * Stage-2 enforces Normal-WB and Device-nGnRE and Normal-NC */ #define MT_S2_FWB_NORMAL 6 +#define MT_S2_FWB_NORMAL_NC 5 #define MT_S2_FWB_DEVICE_nGnRE 1 #ifdef CONFIG_ARM64_4K_PAGES diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index af547be1779b..b94e93135fea 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -88,8 +88,8 @@ }) #define PAGE_S2 __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN) +#define PAGE_S2_NC __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL_NC) | PTE_S2_RDONLY | PAGE_S2_XN) #define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PAGE_S2_XN) - #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 51b791c750f1..6f66efb71743 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1452,7 +1452,14 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma) } vma->vm_private_data = vdev; +#ifdef CONFIG_ARM64 + if (vfio_pci_is_vga(pdev)) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +#else vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +#endif vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; /* diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 11103b75c596..a46a58696834 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -206,6 +206,17 @@ static inline void kvm_pgd_populate(pgd_t *pgdp, pud_t *pudp) dsb(ishst); } +/** + * is_vma_write_combine - check if VMA is mapped with writecombine or not + * Return true if VMA mapped with MT_NORMAL_NC otherwise fasle + */ +static inline bool is_vma_write_combine(struct vm_area_struct *vma) +{ + pteval_t pteval = pgprot_val(vma->vm_page_prot); + + return ((pteval & PTE_ATTRINDX_MASK) == PTE_ATTRINDX(MT_NORMAL_NC)); +} + /* * Unmapping vs dcache management: * @@ -1198,7 +1209,8 @@ static int stage2_pmdp_test_and_clear_young(pmd_t *pmd) * @size: The size of the mapping */ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, - phys_addr_t pa, unsigned long size, bool writable) + phys_addr_t pa, unsigned long size, + bool writable, bool writecombine) { phys_addr_t addr, end; int ret = 0; @@ -1209,7 +1221,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, pfn = __phys_to_pfn(pa); for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { - pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); + pte_t pte = pfn_pte(pfn, writecombine ? PAGE_S2_NC : PAGE_S2_DEVICE); if (writable) pte = kvm_s2pte_mkwrite(pte); @@ -2135,7 +2147,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ret = kvm_phys_addr_ioremap(kvm, gpa, pa, vm_end - vm_start, - writable); + writable, + is_vma_write_combine(vma)); if (ret) break; } diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 91b14dfacd1d..bc9f5a9a1fd9 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -352,7 +352,7 @@ int vgic_v2_map_resources(struct kvm *kvm) if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, kvm_vgic_global_state.vcpu_base, - KVM_VGIC_V2_CPU_SIZE, true); + KVM_VGIC_V2_CPU_SIZE, true, false); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); goto out; -- 2.27.0 _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm