Dirty logging ultimately breaks down MMU mappings to 4k granularity. When dirty logging is no longer needed, these granaular mappings represent a useless performance penalty. When dirty logging is disabled, search the paging structure for mappings that could be re-constituted into a large page mapping. Zap those mappings so that they can be faulted in again at a higher mapping level. Tested by running kvm-unit-tests and KVM selftests on an Intel Haswell machine. This series introduced no new failures. This series can be viewed in Gerrit at: https://linux-review.googlesource.com/c/virt/kvm/kvm/+/2538 Signed-off-by: Ben Gardon <bgardon@xxxxxxxxxx> --- arch/x86/kvm/mmu/mmu.c | 3 ++ arch/x86/kvm/mmu/tdp_mmu.c | 62 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/mmu/tdp_mmu.h | 2 ++ 3 files changed, 67 insertions(+) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index b9074603f9df1..12892fc4f146d 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6025,6 +6025,9 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, spin_lock(&kvm->mmu_lock); slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot, kvm_mmu_zap_collapsible_spte, true); + + if (kvm->arch.tdp_mmu_enabled) + kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot); spin_unlock(&kvm->mmu_lock); } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index e5cb7f0ec23e8..a2895119655ac 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1099,3 +1099,65 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot) return spte_set; } +/* + * Clear non-leaf entries (and free associated page tables) which could + * be replaced by large mappings, for GFNs within the slot. + */ +static void zap_collapsible_spte_range(struct kvm *kvm, + struct kvm_mmu_page *root, + gfn_t start, gfn_t end) +{ + struct tdp_iter iter; + kvm_pfn_t pfn; + bool spte_set = false; + int as_id = kvm_mmu_page_as_id(root); + + for_each_tdp_pte_root(iter, root, start, end) { + if (!is_shadow_present_pte(iter.old_spte) || + is_last_spte(iter.old_spte, iter.level)) + continue; + + pfn = spte_to_pfn(iter.old_spte); + if (kvm_is_reserved_pfn(pfn) || + !PageTransCompoundMap(pfn_to_page(pfn))) + continue; + + *iter.sptep = 0; + handle_changed_spte(kvm, as_id, iter.gfn, iter.old_spte, + 0, iter.level); + spte_set = true; + + spte_set = !tdp_mmu_iter_cond_resched(kvm, &iter); + } + + if (spte_set) + kvm_flush_remote_tlbs(kvm); +} + +/* + * Clear non-leaf entries (and free associated page tables) which could + * be replaced by large mappings, for GFNs within the slot. + */ +void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, + const struct kvm_memory_slot *slot) +{ + struct kvm_mmu_page *root; + int root_as_id; + + for_each_tdp_mmu_root(kvm, root) { + root_as_id = kvm_mmu_page_as_id(root); + if (root_as_id != slot->as_id) + continue; + + /* + * Take a reference on the root so that it cannot be freed if + * this thread releases the MMU lock and yields in this loop. + */ + get_tdp_mmu_root(kvm, root); + + zap_collapsible_spte_range(kvm, root, slot->base_gfn, + slot->base_gfn + slot->npages); + + put_tdp_mmu_root(kvm, root); + } +} diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index 2c9322ba3462b..10e70699c5372 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -38,4 +38,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, gfn_t gfn, unsigned long mask, bool wrprot); bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot); +void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, + const struct kvm_memory_slot *slot); #endif /* __KVM_X86_MMU_TDP_MMU_H */ -- 2.28.0.709.gb0816b6eb0-goog