Zapping a root means scanning for present entries in a page-table hierarchy. This process is relatively slow since it needs to be preemtible as millions of entries might be processed. Furthermore the root-page is traversed multiple times as zapping is done with increasing page-sizes. Optimizing for the not-present case speeds up the hello microbenchmark by 115 microseconds. Signed-off-by: Bernhard Kauer <bk@xxxxxxxxx> --- arch/x86/kvm/mmu/tdp_iter.h | 21 +++++++++++++++++++++ arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index 2880fd392e0c..7ad28ac2c6b8 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -130,6 +130,27 @@ struct tdp_iter { #define for_each_tdp_pte(iter, root, start, end) \ for_each_tdp_pte_min_level(iter, root, PG_LEVEL_4K, start, end) + +/* + * Skip up to count not present entries of the iterator. Returns true + * if the final entry is not present. + */ +static inline bool tdp_iter_skip_not_present(struct tdp_iter *iter, int count) +{ + int i; + int pos; + + pos = SPTE_INDEX(iter->gfn << PAGE_SHIFT, iter->level); + count = min(count, SPTE_ENT_PER_PAGE - 1 - pos); + for (i = 0; i < count && !is_shadow_present_pte(iter->old_spte); i++) + iter->old_spte = kvm_tdp_mmu_read_spte(iter->sptep + i + 1); + + iter->gfn += i * KVM_PAGES_PER_HPAGE(iter->level); + iter->next_last_level_gfn = iter->gfn; + iter->sptep += i; + return !is_shadow_present_pte(iter->old_spte); +} + tdp_ptep_t spte_to_child_pt(u64 pte, int level); void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root, diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 1951f76db657..404726511f95 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -750,7 +750,7 @@ static void __tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root, if (tdp_mmu_iter_cond_resched(kvm, &iter, false, shared)) continue; - if (!is_shadow_present_pte(iter.old_spte)) + if (tdp_iter_skip_not_present(&iter, 32)) continue; if (iter.level > zap_level) -- 2.45.2