Handles the guest faults in KVM by mapping in corresponding user pages in the 2nd stage page tables. Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and pgprot_guest variables used to map 2nd stage memory for KVM guests. Signed-off-by: Christoffer Dall <c.dall@xxxxxxxxxxxxxxxxxxxxxx> --- arch/arm/include/asm/pgtable-3level.h | 9 +++ arch/arm/include/asm/pgtable.h | 4 + arch/arm/kvm/arm_mmu.c | 107 ++++++++++++++++++++++++++++++++- arch/arm/mm/mmu.c | 3 + 4 files changed, 121 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index a6261f5..d8c5c14 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -104,4 +104,13 @@ */ #define L_PGD_SWAPPER (_AT(pgdval_t, 1) << 55) /* swapper_pg_dir entry */ +/* + * 2-nd stage PTE definitions for LPAE. + */ +#define L_PTE2_READ (_AT(pteval_t, 1) << 6) /* HAP[0] */ +#define L_PTE2_WRITE (_AT(pteval_t, 1) << 7) /* HAP[1] */ +#define L_PTE2_NORM_WB (_AT(pteval_t, 3) << 4) /* MemAttr[3:2] */ +#define L_PTE2_INNER_WB (_AT(pteval_t, 3) << 2) /* MemAttr[1:0] */ + + #endif /* _ASM_PGTABLE_3LEVEL_H */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index db3b6e8..0e0ca21 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -76,6 +76,7 @@ extern void __pgd_error(const char *file, int line, pgd_t); extern pgprot_t pgprot_user; extern pgprot_t pgprot_kernel; +extern pgprot_t pgprot_guest; #define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b)) @@ -89,6 +90,9 @@ extern pgprot_t pgprot_kernel; #define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN) #define PAGE_KERNEL_EXEC pgprot_kernel #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_USER) +#define PAGE_KVM_GUEST _MOD_PROT(pgprot_guest, L_PTE2_READ | \ + L_PTE2_WRITE | L_PTE2_NORM_WB | \ + L_PTE2_INNER_WB) #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) diff --git a/arch/arm/kvm/arm_mmu.c b/arch/arm/kvm/arm_mmu.c index 5af0a7c..6040aff 100644 --- a/arch/arm/kvm/arm_mmu.c +++ b/arch/arm/kvm/arm_mmu.c @@ -290,8 +290,111 @@ void kvm_free_stage2_pgd(struct kvm *kvm) kvm->arch.pgd = NULL; } +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + gfn_t gfn, struct kvm_memory_slot *memslot) +{ + pfn_t pfn; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte, new_pte; + + pfn = gfn_to_pfn(vcpu->kvm, gfn); + + if (is_error_pfn(pfn)) { + kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have " + "corresponding host mapping", + gfn, gfn << PAGE_SHIFT); + return -EFAULT; + } + + /* Create 2nd stage page table mapping - Level 1 */ + pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa); + pud = pud_offset(pgd, fault_ipa); + if (pud_none(*pud)) { + pmd = pmd_alloc_one(NULL, fault_ipa); + if (!pmd) { + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd"); + return -ENOMEM; + } + pud_populate(NULL, pud, pmd); + pmd += pmd_index(fault_ipa); + } else + pmd = pmd_offset(pud, fault_ipa); + + /* Create 2nd stage page table mapping - Level 2 */ + if (pmd_none(*pmd)) { + pte = pte_alloc_one_kernel(NULL, fault_ipa); + if (!pte) { + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte"); + return -ENOMEM; + } + pmd_populate_kernel(NULL, pmd, pte); + pte += pte_index(fault_ipa); + } else + pte = pte_offset_kernel(pmd, fault_ipa); + + /* Create 2nd stage page table mapping - Level 3 */ + new_pte = pfn_pte(pfn, PAGE_KVM_GUEST); + set_pte_ext(pte, new_pte, 0); + + return 0; +} + +#define HSR_ABT_FS (0x3f) +#define HPFAR_MASK (~0xf) + +/** + * kvm_handle_guest_abort - handles all 2nd stage aborts + * @vcpu: the VCPU pointer + * @run: the kvm_run structure + * + * Any abort that gets to the host is almost guaranteed to be caused by a + * missing second stage translation table entry, which can mean that either the + * guest simply needs more memory and we must allocate an appropriate page or it + * can mean that the guest tried to access I/O memory, which is emulated by user + * space. The distinction is based on the IPA causing the fault and whether this + * memory region has been registered as standard RAM by user space. + */ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) { - KVMARM_NOT_IMPLEMENTED(); - return -EINVAL; + unsigned long hsr_ec; + unsigned long fault_status; + phys_addr_t fault_ipa; + struct kvm_memory_slot *memslot = NULL; + bool is_iabt; + gfn_t gfn; + + hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT; + is_iabt = (hsr_ec == HSR_EC_IABT); + + /* Check that the second stage fault is a translation fault */ + fault_status = vcpu->arch.hsr & HSR_ABT_FS; + if ((fault_status & 0x3c) != 0x4) { + kvm_err(-EFAULT, "Unsupported fault status: %x", + fault_status & 0x3c); + return -EFAULT; + } + + fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8; + + gfn = fault_ipa >> PAGE_SHIFT; + if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { + if (is_iabt) { + kvm_err(-EFAULT, "Inst. abort on I/O address"); + return -EFAULT; + } + + kvm_msg("I/O address abort..."); + KVMARM_NOT_IMPLEMENTED(); + return -EINVAL; + } + + memslot = gfn_to_memslot(vcpu->kvm, gfn); + if (!memslot->user_alloc) { + kvm_err(-EINVAL, "non user-alloc memslots not supported"); + return -EINVAL; + } + + return user_mem_abort(vcpu, fault_ipa, gfn, memslot); } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 749475e..c025e65 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -55,9 +55,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; static unsigned int ecc_mask __initdata = 0; pgprot_t pgprot_user; pgprot_t pgprot_kernel; +pgprot_t pgprot_guest; EXPORT_SYMBOL(pgprot_user); EXPORT_SYMBOL(pgprot_kernel); +EXPORT_SYMBOL(pgprot_guest); struct cachepolicy { const char policy[16]; @@ -497,6 +499,7 @@ static void __init build_mem_type_table(void) pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | kern_pgprot); + pgprot_guest = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG); mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html