From: Suraj Jitindar Singh <sjitindarsingh@xxxxxxxxx> When a host (L0) page which is mapped into a (L1) guest is in turn mapped through to a nested (L2) guest we keep a reverse mapping (rmap) so that these mappings can be retrieved later. Whenever we create an entry in a shadow_pgtable for a nested guest we create a corresponding rmap entry and add it to the list for the L1 guest memslot at the index of the L1 guest page it maps. This means at the L1 guest memslot we end up with lists of rmaps as follows; memslot (L1) ----------------- | gfn = 0 | -> rmap -> rmap -> rmap -> NULL ----------------- | gfn = 1 | ----------------- | gfn = 2 | -> rmap -> NULL ----------------- | gfn = 3 | -> rmap -> rmap -> NULL ----------------- | gfn = 4 | ----------------- | gfn = n | ----------------- When we are notified of a host page being invalidated which has been mapped through to a (L1) guest, we can then walk the rmap list for that guest page, and find and invalidate all of the corresponding shadow_pgtable entries. Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@xxxxxxxxx> Signed-off-by: Paul Mackerras <paulus@xxxxxxxxxx> --- arch/powerpc/include/asm/kvm_book3s.h | 3 + arch/powerpc/include/asm/kvm_book3s_64.h | 17 +++++- arch/powerpc/kvm/book3s_64_mmu_radix.c | 45 ++++++++++----- arch/powerpc/kvm/book3s_hv.c | 1 + arch/powerpc/kvm/book3s_hv_nested.c | 98 +++++++++++++++++++++++++++++++- 5 files changed, 147 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 664e1fb..d849518c 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -196,6 +196,9 @@ extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr, int table_index, u64 *pte_ret_p); extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, bool data, bool iswrite); +extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, + unsigned int shift, struct kvm_memory_slot *memslot, + unsigned int lpid); extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, unsigned long gpa, unsigned int lpid); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 16c3a97..dadd40e 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -37,6 +37,13 @@ struct kvm_nested_guest { struct kvm_nested_guest *next; }; +/* Structure for a nested guest rmap entry */ +struct rmap_nested { + struct llist_node list; + unsigned int l1_lpid; /* L1's lpid of this nested guest */ + unsigned long n_gpa; /* nested guest physical address */ +}; + struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int lpid, bool create); void kvmhv_put_nested(struct kvm_nested_guest *gp); @@ -535,7 +542,15 @@ static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu) extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, unsigned long gpa, unsigned int level, - unsigned long mmu_seq, unsigned int lpid); + unsigned long mmu_seq, unsigned int lpid, + unsigned long *rmap, struct rmap_nested *n_rmap); +extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmap, + struct rmap_nested *n_rmap); +extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm, + struct kvm_memory_slot *memslot, + unsigned long gpa, unsigned long hpa, + unsigned long nbytes); +extern void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 778ae87..52b2a57 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -256,28 +256,38 @@ static void kvmppc_pmd_free(pmd_t *pmdp) kmem_cache_free(kvm_pmd_cache, pmdp); } -void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, - unsigned long gpa, unsigned int shift, - struct kvm_memory_slot *memslot, +/* Called with kvm->mmu_lock held */ +void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, + unsigned int shift, struct kvm_memory_slot *memslot, unsigned int lpid) { unsigned long old; + unsigned long gfn = gpa >> PAGE_SHIFT; + unsigned long page_size = PAGE_SIZE; + unsigned long hpa; old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid); - if ((old & _PAGE_DIRTY) && (lpid == kvm->arch.lpid)) { - unsigned long gfn = gpa >> PAGE_SHIFT; - unsigned long page_size = PAGE_SIZE; - if (shift) - page_size = 1ul << shift; + /* The following only applies to L1 entries */ + if (lpid != kvm->arch.lpid) + return; + + if (!memslot) { + memslot = gfn_to_memslot(kvm, gfn); if (!memslot) - memslot = gfn_to_memslot(kvm, gfn); - if (memslot && memslot->dirty_bitmap) { - kvmppc_update_dirty_map(memslot, gfn, page_size); - } + return; } + if (shift) + page_size = 1ul << shift; + + gpa &= ~(page_size - 1); + hpa = old & PTE_RPN_MASK; + kvmhv_remove_nest_rmap_range(kvm, memslot, gpa, hpa, page_size); + + if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) + kvmppc_update_dirty_map(memslot, gfn, page_size); } /* @@ -433,7 +443,8 @@ static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud, int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, unsigned long gpa, unsigned int level, - unsigned long mmu_seq, unsigned int lpid) + unsigned long mmu_seq, unsigned int lpid, + unsigned long *rmap, struct rmap_nested *n_rmap) { pgd_t *pgd; pud_t *pud, *new_pud = NULL; @@ -512,6 +523,8 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa, lpid); } kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte); + if (rmap && n_rmap) + kvmhv_insert_nest_rmap(kvm, rmap, n_rmap); ret = 0; goto out_unlock; } @@ -562,6 +575,8 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa, lpid); } kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte); + if (rmap && n_rmap) + kvmhv_insert_nest_rmap(kvm, rmap, n_rmap); ret = 0; goto out_unlock; } @@ -586,6 +601,8 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, goto out_unlock; } kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte); + if (rmap && n_rmap) + kvmhv_insert_nest_rmap(kvm, rmap, n_rmap); ret = 0; out_unlock: @@ -713,7 +730,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, /* Allocate space in the tree and write the PTE */ ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level, - mmu_seq, kvm->arch.lpid); + mmu_seq, kvm->arch.lpid, NULL, NULL); if (inserted_pte) *inserted_pte = pte; if (levelp) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3b78d97..80da231 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4277,6 +4277,7 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { if (!dont || free->arch.rmap != dont->arch.rmap) { + kvmhv_free_memslot_nest_rmap(free); vfree(free->arch.rmap); free->arch.rmap = NULL; } diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index af8066b..9a50feb 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/kvm_host.h> +#include <linux/llist.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> @@ -523,13 +524,96 @@ void kvmhv_put_nested(struct kvm_nested_guest *gp) kvmhv_release_nested(gp); } -struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid) +static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid) { if (lpid > kvm->arch.max_nested_lpid) return NULL; return kvm->arch.nested_guests[lpid]; } +void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmap, + struct rmap_nested *n_rmap) +{ + llist_add(&n_rmap->list, (struct llist_head *) rmap); +} + +static void kvmhv_remove_nest_rmap(struct kvm *kvm, struct rmap_nested *n_rmap, + unsigned long hpa, unsigned long mask) +{ + struct kvm_nested_guest *gp; + unsigned int shift; + pte_t *ptep; + + gp = kvmhv_find_nested(kvm, n_rmap->l1_lpid); + if (!gp) + return; + + /* Find and invalidate the pte */ + ptep = __find_linux_pte(gp->shadow_pgtable, n_rmap->n_gpa, NULL, + &shift); + /* Don't spuriously invalidate ptes if the pfn has changed */ + if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) + kvmppc_unmap_pte(kvm, ptep, n_rmap->n_gpa, shift, NULL, + gp->shadow_lpid); +} + +static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmap, + unsigned long hpa, unsigned long mask) +{ + struct rmap_nested *n_rmap, *next; + struct llist_node *entry = llist_del_all((struct llist_head *) rmap); + + if (!entry) /* List was empty */ + return; + + llist_for_each_entry_safe(n_rmap, next, entry, list) { + kvmhv_remove_nest_rmap(kvm, n_rmap, hpa, mask); + kfree(n_rmap); + } +} + +/* called with kvm->mmu_lock held */ +void kvmhv_remove_nest_rmap_range(struct kvm *kvm, + struct kvm_memory_slot *memslot, + unsigned long gpa, unsigned long hpa, + unsigned long nbytes) +{ + unsigned long gfn, end_gfn; + unsigned long addr_mask; + + if (!memslot) + return; + gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn; + end_gfn = gfn + (nbytes >> PAGE_SHIFT); + + addr_mask = PTE_RPN_MASK & ~(nbytes - 1); + hpa &= addr_mask; + + for (; gfn < end_gfn; gfn++) { + unsigned long *rmap = &memslot->arch.rmap[gfn]; + kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask); + } +} + +void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free) +{ + struct rmap_nested *n_rmap, *next; + struct llist_node *entry; + unsigned long *rmap; + unsigned long page; + + for (page = 0; page < free->npages; page++) { + rmap = &free->arch.rmap[page]; + entry = llist_del_all((struct llist_head *) rmap); + + if (!entry) + continue; + + llist_for_each_entry_safe(n_rmap, next, entry, list) + kfree(n_rmap); + } +} + static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu, struct kvm_nested_guest *gp, long gpa, int *shift_ret) @@ -736,11 +820,13 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, { struct kvm *kvm = vcpu->kvm; struct kvm_memory_slot *memslot; + struct rmap_nested *n_rmap; struct kvmppc_pte gpte; pte_t pte, *pte_p; unsigned long mmu_seq; unsigned long dsisr = vcpu->arch.fault_dsisr; unsigned long ea = vcpu->arch.fault_dar; + unsigned long *rmap; unsigned long n_gpa, gpa, gfn, perm = 0UL; unsigned int shift, l1_shift, level; bool writing = !!(dsisr & DSISR_ISSTORE); @@ -867,8 +953,16 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, /* 4. Insert the pte into our shadow_pgtable */ + n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL); + if (!n_rmap) + return RESUME_GUEST; /* Let the guest try again */ + n_rmap->l1_lpid = gp->l1_lpid; + n_rmap->n_gpa = n_gpa; + rmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level, - mmu_seq, gp->shadow_lpid); + mmu_seq, gp->shadow_lpid, rmap, n_rmap); + if (ret) + kfree(n_rmap); if (ret == -EAGAIN) ret = RESUME_GUEST; /* Let the guest try again */ -- 2.7.4