As per 'commit 178a6915434c ("KVM: arm64: nv: Unmap/flush shadow stage 2 page tables")', when ever there is unmap of pages that are mapped to L1, they are invalidated from both L1 S2-MMU and from all the active shadow/L2 S2-MMU tables. Since there is no mapping to invalidate the IPAs of Shadow S2 to a page, there is a complete S2-MMU page table walk and invalidation is done covering complete address space allocated to a L2. This has performance impacts and even soft lockup for NV(L1 and L2) boots with higher number of CPUs and large Memory. Adding a lookup table of mapping of Shadow IPA to Canonical IPA whenever a page is mapped to any of the L2. While any page is unmaped, this lookup is helpful to unmap only if it is mapped in any of the shadow S2-MMU tables. Hence avoids unnecessary long iterations of S2-MMU table walk-through and invalidation for the complete address space. Signed-off-by: Ganapatrao Kulkarni <gankulkarni@xxxxxxxxxxxxxxxxxxxxxx> --- arch/arm64/include/asm/kvm_emulate.h | 5 ++ arch/arm64/include/asm/kvm_host.h | 14 ++++ arch/arm64/include/asm/kvm_nested.h | 4 + arch/arm64/kvm/mmu.c | 19 ++++- arch/arm64/kvm/nested.c | 113 +++++++++++++++++++++++++++ 5 files changed, 152 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5173f8cf2904..f503b2eaedc4 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -656,4 +656,9 @@ static inline bool kvm_is_shadow_s2_fault(struct kvm_vcpu *vcpu) vcpu->arch.hw_mmu->nested_stage2_enabled); } +static inline bool kvm_is_l1_using_shadow_s2(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.hw_mmu != &vcpu->kvm->arch.mmu); +} + #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8da3c9a81ae3..f61c674c300a 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -144,6 +144,13 @@ struct kvm_vmid { atomic64_t id; }; +struct mapipa_node { + struct rb_node node; + phys_addr_t ipa; + phys_addr_t shadow_ipa; + long size; +}; + struct kvm_s2_mmu { struct kvm_vmid vmid; @@ -216,6 +223,13 @@ struct kvm_s2_mmu { * >0: Somebody is actively using this. */ atomic_t refcnt; + + /* + * For a Canonical IPA to Shadow IPA mapping. + */ + struct rb_root nested_mapipa_root; + rwlock_t mmu_lock; + }; static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index da7ebd2f6e24..c31a59a1fdc6 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -65,6 +65,9 @@ extern void kvm_init_nested(struct kvm *kvm); extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu); extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu); extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu); +extern void add_shadow_ipa_map_node( + struct kvm_s2_mmu *mmu, + phys_addr_t ipa, phys_addr_t shadow_ipa, long size); union tlbi_info; @@ -123,6 +126,7 @@ extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu, extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2); extern void kvm_nested_s2_wp(struct kvm *kvm); extern void kvm_nested_s2_unmap(struct kvm *kvm); +extern void kvm_nested_s2_unmap_range(struct kvm *kvm, struct kvm_gfn_range *range); extern void kvm_nested_s2_flush(struct kvm *kvm); int handle_wfx_nested(struct kvm_vcpu *vcpu, bool is_wfe); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 61bdd8798f83..3948681426a0 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1695,6 +1695,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, memcache, KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED); + if ((nested || kvm_is_l1_using_shadow_s2(vcpu)) && !ret) { + struct kvm_s2_mmu *shadow_s2_mmu; + + ipa &= ~(vma_pagesize - 1); + shadow_s2_mmu = lookup_s2_mmu(vcpu); + add_shadow_ipa_map_node(shadow_s2_mmu, ipa, fault_ipa, vma_pagesize); + } } /* Mark the page dirty only if the fault is handled successfully */ @@ -1918,7 +1925,7 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) (range->end - range->start) << PAGE_SHIFT, range->may_block); - kvm_nested_s2_unmap(kvm); + kvm_nested_s2_unmap_range(kvm, range); return false; } @@ -1953,7 +1960,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) PAGE_SIZE, __pfn_to_phys(pfn), KVM_PGTABLE_PROT_R, NULL, 0); - kvm_nested_s2_unmap(kvm); + kvm_nested_s2_unmap_range(kvm, range); return false; } @@ -2223,12 +2230,18 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) { + struct kvm_gfn_range range; + gpa_t gpa = slot->base_gfn << PAGE_SHIFT; phys_addr_t size = slot->npages << PAGE_SHIFT; + range.start = gpa; + range.end = gpa + size; + range.may_block = true; + write_lock(&kvm->mmu_lock); kvm_unmap_stage2_range(&kvm->arch.mmu, gpa, size); - kvm_nested_s2_unmap(kvm); + kvm_nested_s2_unmap_range(kvm, &range); write_unlock(&kvm->mmu_lock); } diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index f88d9213c6b3..888ec9fba4a0 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -565,6 +565,88 @@ void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid, write_unlock(&kvm->mmu_lock); } +/* + * Create a node and add to lookup table, when a page is mapped to + * Canonical IPA and also mapped to Shadow IPA. + */ +void add_shadow_ipa_map_node(struct kvm_s2_mmu *mmu, + phys_addr_t ipa, + phys_addr_t shadow_ipa, long size) +{ + struct rb_root *ipa_root = &(mmu->nested_mapipa_root); + struct rb_node **node = &(ipa_root->rb_node), *parent = NULL; + struct mapipa_node *new; + + new = kzalloc(sizeof(struct mapipa_node), GFP_KERNEL); + if (!new) + return; + + new->shadow_ipa = shadow_ipa; + new->ipa = ipa; + new->size = size; + + write_lock(&mmu->mmu_lock); + + while (*node) { + struct mapipa_node *tmp; + + tmp = container_of(*node, struct mapipa_node, node); + parent = *node; + if (new->ipa < tmp->ipa) { + node = &(*node)->rb_left; + } else if (new->ipa > tmp->ipa) { + node = &(*node)->rb_right; + } else { + write_unlock(&mmu->mmu_lock); + kfree(new); + return; + } + } + + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, ipa_root); + write_unlock(&mmu->mmu_lock); +} + +/* + * Iterate over the lookup table of Canonical IPA to Shadow IPA. + * Return Shadow IPA, if the page mapped to Canonical IPA is + * also mapped to a Shadow IPA. + * + */ +bool get_shadow_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, phys_addr_t *shadow_ipa, long *size) +{ + struct rb_node *node; + struct mapipa_node *tmp = NULL; + + read_lock(&mmu->mmu_lock); + node = mmu->nested_mapipa_root.rb_node; + + while (node) { + tmp = container_of(node, struct mapipa_node, node); + + if (tmp->ipa == ipa) + break; + else if (ipa > tmp->ipa) + node = node->rb_right; + else + node = node->rb_left; + } + + read_unlock(&mmu->mmu_lock); + + if (tmp && tmp->ipa == ipa) { + *shadow_ipa = tmp->shadow_ipa; + *size = tmp->size; + write_lock(&mmu->mmu_lock); + rb_erase(&tmp->node, &mmu->nested_mapipa_root); + write_unlock(&mmu->mmu_lock); + kfree(tmp); + return true; + } + return false; +} + /* Must be called with kvm->mmu_lock held */ struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu) { @@ -674,6 +756,7 @@ void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu) mmu->tlb_vttbr = 1; mmu->nested_stage2_enabled = false; atomic_set(&mmu->refcnt, 0); + mmu->nested_mapipa_root = RB_ROOT; } void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu) @@ -760,6 +843,36 @@ void kvm_nested_s2_unmap(struct kvm *kvm) } } +void kvm_nested_s2_unmap_range(struct kvm *kvm, struct kvm_gfn_range *range) +{ + int i; + long size; + bool ret; + + for (i = 0; i < kvm->arch.nested_mmus_size; i++) { + struct kvm_s2_mmu *mmu = &kvm->arch.nested_mmus[i]; + + if (kvm_s2_mmu_valid(mmu)) { + phys_addr_t shadow_ipa, start, end; + + start = range->start << PAGE_SHIFT; + end = range->end << PAGE_SHIFT; + + while (start < end) { + size = PAGE_SIZE; + /* + * get the Shadow IPA if the page is mapped + * to L1 and also mapped to any of active L2. + */ + ret = get_shadow_ipa(mmu, start, &shadow_ipa, &size); + if (ret) + kvm_unmap_stage2_range(mmu, shadow_ipa, size); + start += size; + } + } + } +} + /* expects kvm->mmu_lock to be held */ void kvm_nested_s2_flush(struct kvm *kvm) { -- 2.40.1