[Android-virt] [PATCH 3/4] ARM: KVM: Add support for MMU notifiers

marc.zyngier at arm.com (Marc Zyngier) · Thu, 23 Feb 2012 13:51:16 +0000

Add the necessary infrastructure to handle MMU notifiers on KVM/ARM.
As we don't have shadow page tables, the implementation is actually very
simple. The only supported operation is kvm_unmap_hva(), where we remove
the HVA from the 2nd stage translation. All other hooks are NOPs.

Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
---
 arch/arm/include/asm/kvm_asm.h  |    3 ++
 arch/arm/include/asm/kvm_host.h |   19 +++++++++++++++
 arch/arm/kvm/Kconfig            |    1 +
 arch/arm/kvm/interrupts.S       |   34 +++++++++++++++++++++++++++
 arch/arm/kvm/mmu.c              |   48 +++++++++++++++++++++++++++++++++-----
 5 files changed, 98 insertions(+), 7 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 69afdf3..c7cbe24 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -32,6 +32,7 @@
 #define SMCHYP_HVBAR_W 0xfffffff0
 
 #ifndef __ASSEMBLY__
+struct kvm;
 struct kvm_vcpu;
 
 extern char __kvm_hyp_init[];
@@ -42,6 +43,8 @@ extern char __kvm_hyp_vector[];
 extern char __kvm_hyp_code_start[];
 extern char __kvm_hyp_code_end[];
 
+extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+
 extern void __kvm_flush_vm_context(void);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index af22cee..06521b4 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -115,4 +115,23 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+struct kvm;
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+
+/* We do not have shadow page tables, hence the empty hooks */
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	return 0;
+}
+
+static inline void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index a458a55..edf150f 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -37,6 +37,7 @@ config KVM_ARM_HOST
 	depends on KVM
 	depends on MMU
 	depends on CPU_V7 && ARM_VIRT_EXT
+	select	MMU_NOTIFIER
 	---help---
 	  Provides host support for ARM processors.
 
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 8e427dc..ad6ae4e 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -35,6 +35,40 @@ __kvm_hyp_code_start:
 	.globl __kvm_hyp_code_start
 
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@  Flush per-VMID TLBs
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+	/*
+	 * void __kvm_tlb_flush_vmid(struct kvm *kvm);
+	 *
+	 * We rely on the hardware to broadcast the TLB invalidation
+	 * to all CPUs inside the inner-shareable domain (which is
+	 * the case for all v7 implementations).
+	 * If we come across a non-IS SMP implementation, we'll have to
+	 * use an IPI based mechanism. Until then, we stick to the simple
+	 * hardware assisted version.
+	 */
+ENTRY(__kvm_tlb_flush_vmid)
+	hvc	#0			@ Switch to Hyp mode
+	push	{r2, r3}
+
+	ldrd	r2, r3, [r0, #KVM_VTTBR]
+	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+	isb
+	mcr     p15, 0, r0, c8, c3, 0	@ TBLIALLIS
+	dsb
+	isb
+	mov	r2, #0
+	mov	r3, #0
+	mcrr	p15, 6, r2, r3, c2	@ Back to VMID #0
+	isb
+
+	pop	{r2, r3}
+	hvc	#0			@ Back to SVC
+	mov	pc, lr
+ENDPROC(__kvm_tlb_flush_vmid)
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 @ Flush TLBs and instruction caches of current CPU for all VMIDs
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 7fed452..0fecb32 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -279,20 +279,22 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
 	kvm->arch.pgd = NULL;
 }
 
-static int __user_mem_abort(struct kvm *kvm, phys_addr_t addr, pfn_t pfn)
+static const pte_t null_pte;
+
+static int stage2_set_pte(struct kvm *kvm, phys_addr_t addr, const pte_t *new_pte)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte, new_pte;
+	pte_t *pte;
 
 	/* Create 2nd stage page table mapping - Level 1 */
 	pgd = kvm->arch.pgd + pgd_index(addr);
 	pud = pud_offset(pgd, addr);
 	if (pud_none(*pud)) {
+		BUG_ON(new_pte == &null_pte);
 		pmd = pmd_alloc_one(NULL, addr);
 		if (!pmd) {
-			put_page(pfn_to_page(pfn));
 			kvm_err("Cannot allocate 2nd stage pmd\n");
 			return -ENOMEM;
 		}
@@ -303,9 +305,9 @@ static int __user_mem_abort(struct kvm *kvm, phys_addr_t addr, pfn_t pfn)
 
 	/* Create 2nd stage page table mapping - Level 2 */
 	if (pmd_none(*pmd)) {
+		BUG_ON(new_pte == &null_pte);
 		pte = pte_alloc_one_kernel(NULL, addr);
 		if (!pte) {
-			put_page(pfn_to_page(pfn));
 			kvm_err("Cannot allocate 2nd stage pte\n");
 			return -ENOMEM;
 		}
@@ -315,8 +317,7 @@ static int __user_mem_abort(struct kvm *kvm, phys_addr_t addr, pfn_t pfn)
 		pte = pte_offset_kernel(pmd, addr);
 
 	/* Create 2nd stage page table mapping - Level 3 */
-	new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
-	set_pte_ext(pte, new_pte, 0);
+	set_pte_ext(pte, *new_pte, 0);
 
 	return 0;
 }
@@ -324,6 +325,7 @@ static int __user_mem_abort(struct kvm *kvm, phys_addr_t addr, pfn_t pfn)
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  gfn_t gfn, struct kvm_memory_slot *memslot)
 {
+	pte_t new_pte;
 	pfn_t pfn;
 	int ret;
 
@@ -339,7 +341,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	}
 
 	mutex_lock(&vcpu->kvm->arch.pgd_mutex);
-	ret = __user_mem_abort(vcpu->kvm, fault_ipa, pfn);
+	new_pte = pfn_pte(pfn, PAGE_KVM_GUEST);
+	ret = stage2_set_pte(vcpu->kvm, fault_ipa, &new_pte);
+	if (ret)
+		put_page(pfn_to_page(pfn));
 	mutex_unlock(&vcpu->kvm->arch.pgd_mutex);
 
 	return ret;
@@ -547,3 +552,32 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	return user_mem_abort(vcpu, fault_ipa, gfn, memslot);
 }
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int needs_stage2_flush = 0;
+
+	slots = kvm_memslots(kvm);
+
+	/* we only care about the pages that the guest sees */
+	kvm_for_each_memslot(memslot, slots) {
+		unsigned long start = memslot->userspace_addr;
+		unsigned long end;
+
+		end = start + (memslot->npages << PAGE_SHIFT);
+		if (hva >= start && hva < end) {
+			gpa_t gpa_offset = hva - start;
+			gpa_t gpa = (memslot->base_gfn << PAGE_SHIFT) + gpa_offset;
+
+			stage2_set_pte(kvm, gpa, &null_pte);
+			needs_stage2_flush = 1;
+		}
+	}
+
+	if (needs_stage2_flush)
+		__kvm_tlb_flush_vmid(kvm);
+
+	return 0;
+}
-- 
1.7.7.1