Handles the guest faults in KVM by mapping in corresponding user pages in the 2nd stage page tables. Introduces new ARM-specific kernel memory types, PAGE_KVM_GUEST and pgprot_guest variables used to map 2nd stage memory for KVM guests. --- arch/arm/include/asm/pgtable-3level.h | 9 +++ arch/arm/include/asm/pgtable.h | 4 ++ arch/arm/kernel/asm-offsets.c | 3 + arch/arm/kvm/arm_interrupts.S | 14 +++++ arch/arm/kvm/arm_mmu.c | 86 +++++++++++++++++++++++++++++++++ arch/arm/mm/mmu.c | 3 + 6 files changed, 118 insertions(+), 1 deletions(-) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 14a3e28..f90d120 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -103,4 +103,13 @@ */ #define L_PGD_SWAPPER (_AT(pgdval_t, 1) << 55) /* swapper_pg_dir entry */ +/* + * 2-nd stage PTE definitions for LPAE. + */ +#define L_PTE2_READ (_AT(pteval_t, 1) << 6) /* HAP[0] */ +#define L_PTE2_WRITE (_AT(pteval_t, 1) << 7) /* HAP[1] */ +#define L_PTE2_NORM_WB (_AT(pteval_t, 3) << 4) /* MemAttr[3:2] */ +#define L_PTE2_INNER_WB (_AT(pteval_t, 3) << 2) /* MemAttr[1:0] */ + + #endif /* _ASM_PGTABLE_3LEVEL_H */ diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 2906f35..c4e71ff 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -75,6 +75,7 @@ extern void __pgd_error(const char *file, int line, pgd_t); extern pgprot_t pgprot_user; extern pgprot_t pgprot_kernel; +extern pgprot_t pgprot_guest; #define _MOD_PROT(p, b) __pgprot(pgprot_val(p) | (b)) @@ -88,6 +89,9 @@ extern pgprot_t pgprot_kernel; #define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN) #define PAGE_KERNEL_EXEC pgprot_kernel #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_USER) +#define PAGE_KVM_GUEST _MOD_PROT(pgprot_guest, L_PTE2_READ | \ + L_PTE2_WRITE | L_PTE2_NORM_WB | \ + L_PTE2_INNER_WB) #define __PAGE_NONE __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | L_PTE_XN) #define __PAGE_SHARED __pgprot(_L_PTE_DEFAULT | L_PTE_USER | L_PTE_XN) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 769fa97..9f485aa 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -139,6 +139,9 @@ int main(void) DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.pc)); DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.cpsr)); DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.hsr)); + DEFINE(VCPU_HDFAR, offsetof(struct kvm_vcpu, arch.hdfar)); + DEFINE(VCPU_HIFAR, offsetof(struct kvm_vcpu, arch.hifar)); + DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.hpfar)); DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); #endif return 0; diff --git a/arch/arm/kvm/arm_interrupts.S b/arch/arm/kvm/arm_interrupts.S index 6d3044c..689b337 100644 --- a/arch/arm/kvm/arm_interrupts.S +++ b/arch/arm/kvm/arm_interrupts.S @@ -483,7 +483,19 @@ guest_trap: stmia r1, {r3, r4, r5} sub r1, r1, #VCPU_USR_REG(0) - mov r0, #ARM_EXCEPTION_HVC + @ Check if we need the fault information + lsr r0, r0, #HSR_EC_SHIFT + cmp r0, #HSR_EC_IABT + beq 2f + cmp r0, #HSR_EC_DABT + beq 2f + b 1f +2: mrc p15, 4, r2, c6, c0, 0 @ HDFAR + mrc p15, 4, r3, c6, c0, 2 @ HIFAR + mrc p15, 4, r4, c6, c0, 4 @ HPFAR + add r5, r1, #VCPU_HDFAR + stmia r5, {r2, r3, r4} +1: mov r0, #ARM_EXCEPTION_HVC b __kvm_vcpu_return .align diff --git a/arch/arm/kvm/arm_mmu.c b/arch/arm/kvm/arm_mmu.c index 683f971..fe27e59 100644 --- a/arch/arm/kvm/arm_mmu.c +++ b/arch/arm/kvm/arm_mmu.c @@ -248,8 +248,94 @@ void kvm_free_stage2_pgd(struct kvm *kvm) KVMARM_NOT_IMPLEMENTED(); } +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + gfn_t gfn, struct kvm_memory_slot *memslot) +{ + pfn_t pfn; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte, new_pte; + + pfn = gfn_to_pfn(vcpu->kvm, gfn); + + if (is_error_pfn(pfn)) { + kvm_err(-EFAULT, "Guest gfn %u (0x%08lx) does not have " + "corresponding host mapping", + gfn, gfn << PAGE_SHIFT); + return -EFAULT; + } + + /* Create 2nd stage page table mapping - Level 1 */ + pgd = vcpu->kvm->arch.pgd + pgd_index(fault_ipa); + if (pgd_none(*pgd)) { + pmd = pmd_alloc_one(NULL, fault_ipa); + if (!pmd) { + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pmd"); + return -ENOMEM; + } + pgd_populate(NULL, pgd, pmd); + pmd += pmd_index(fault_ipa); + } else + pmd = pmd_offset(pgd, fault_ipa); + + /* Create 2nd stage page table mapping - Level 2 */ + if (pmd_none(*pmd)) { + pte = pte_alloc_one_kernel(NULL, fault_ipa); + if (!pte) { + kvm_err(-ENOMEM, "Cannot allocate 2nd stage pte"); + return -ENOMEM; + } + pmd_populate_kernel(NULL, pmd, pte); + pte += pte_index(fault_ipa); + } else + pte = pte_offset_kernel(pmd, fault_ipa); + + /* Create 2nd stage page table mapping - Level 3 */ + new_pte = pfn_pte(pfn, PAGE_KVM_GUEST); + set_pte_ext(pte, new_pte, 0); + + return 0; +} + +#define HSR_ABT_FS (0x3f) +#define HPFAR_MASK (~0xf) int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) { + unsigned long hsr_ec; + unsigned long fault_status; + phys_addr_t fault_ipa; + struct kvm_memory_slot *memslot = NULL; + bool is_iabt; + gfn_t gfn; + + hsr_ec = vcpu->arch.hsr >> HSR_EC_SHIFT; + is_iabt = (hsr_ec == HSR_EC_IABT); + + /* Check that the second stage fault is a translation fault */ + fault_status = vcpu->arch.hsr & HSR_ABT_FS; + if ((fault_status & 0x3c) != 0x4) { + kvm_err(-EFAULT, "Unsupported fault status: %x", + fault_status & 0x3c); + return -EFAULT; + } + + fault_ipa = ((phys_addr_t)vcpu->arch.hpfar & HPFAR_MASK) << 8; + + gfn = fault_ipa >> PAGE_SHIFT; + if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) + goto io_mem_abort; + + memslot = gfn_to_memslot(vcpu->kvm, gfn); + if (memslot->user_alloc) + return user_mem_abort(vcpu, fault_ipa, gfn, memslot); + +io_mem_abort: + if (is_iabt) { + kvm_err(-EFAULT, "Inst. abort on I/O address"); + return -EFAULT; + } + + kvm_msg("I/O address abort..."); KVMARM_NOT_IMPLEMENTED(); return -EINVAL; } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index d1da559..c5cbcd3 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -55,9 +55,11 @@ static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; static unsigned int ecc_mask __initdata = 0; pgprot_t pgprot_user; pgprot_t pgprot_kernel; +pgprot_t pgprot_guest; EXPORT_SYMBOL(pgprot_user); EXPORT_SYMBOL(pgprot_kernel); +EXPORT_SYMBOL(pgprot_guest); struct cachepolicy { const char policy[16]; @@ -497,6 +499,7 @@ static void __init build_mem_type_table(void) pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | kern_pgprot); + pgprot_guest = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG); mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html