From: "Maciej S. Szmigiero" <maciej.szmigiero@xxxxxxxxxx> When retpolines are enabled they have high overhead in the inner loop inside kvm_handle_hva_range() that iterates over the provided memory area. Implement a static dispatch there, just like commit 7a02674d154d ("KVM: x86/mmu: Avoid retpoline on ->page_fault() with TDP") did for the MMU page fault handler. This significantly improves performance on the unmap test on the existing kernel memslot code (tested on a Xeon 8167M machine): 30 slots in use: Test Before After Improvement Unmap 0.0368s 0.0353s 4% Unmap 2M 0.000952s 0.000431s 55% 509 slots in use: Unmap 0.0872s 0.0777s 11% Unmap 2M 0.00236s 0.00168s 29% Looks like performing this indirect call via a retpoline might have interfered with unrolling of the whole loop in the CPU. Provide such static dispatch only for kvm_unmap_rmapp() and kvm_age_rmapp() and their TDP MMU equivalents since other handlers are called in ranges of single byte only, so they already have high overhead to begin with if walking over a large memory area. Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@xxxxxxxxxx> --- arch/x86/kvm/mmu/mmu.c | 59 +++++++++++++------ arch/x86/kvm/mmu/tdp_mmu.c | 116 ++++++++++++++++++++++--------------- 2 files changed, 112 insertions(+), 63 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6d16481aa29d..4140e308cf30 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1456,6 +1456,45 @@ static void slot_rmap_walk_next(struct slot_rmap_walk_iterator *iterator) slot_rmap_walk_okay(_iter_); \ slot_rmap_walk_next(_iter_)) +static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, + struct kvm_memory_slot *slot, gfn_t gfn, int level, + unsigned long data) +{ + u64 *sptep; + struct rmap_iterator iter; + int young = 0; + + for_each_rmap_spte(rmap_head, &iter, sptep) + young |= mmu_spte_age(sptep); + + trace_kvm_age_page(gfn, level, slot, young); + return young; +} + +static int kvm_handle_hva_do(struct kvm *kvm, + struct slot_rmap_walk_iterator *iterator, + struct kvm_memory_slot *memslot, + unsigned long data, + int (*handler)(struct kvm *kvm, + struct kvm_rmap_head *rmap_head, + struct kvm_memory_slot *slot, + gfn_t gfn, + int level, + unsigned long data)) +{ +#ifdef CONFIG_RETPOLINE + if (handler == kvm_unmap_rmapp) + return kvm_unmap_rmapp(kvm, iterator->rmap, memslot, + iterator->gfn, iterator->level, data); + else if (handler == kvm_age_rmapp) + return kvm_age_rmapp(kvm, iterator->rmap, memslot, + iterator->gfn, iterator->level, data); + else +#endif + return handler(kvm, iterator->rmap, memslot, + iterator->gfn, iterator->level, data); +} + static int kvm_handle_hva_range(struct kvm *kvm, unsigned long start, unsigned long end, @@ -1495,8 +1534,9 @@ static int kvm_handle_hva_range(struct kvm *kvm, KVM_MAX_HUGEPAGE_LEVEL, gfn_start, gfn_end - 1, &iterator) - ret |= handler(kvm, iterator.rmap, memslot, - iterator.gfn, iterator.level, data); + ret |= kvm_handle_hva_do(kvm, &iterator, + memslot, data, + handler); } } @@ -1539,21 +1579,6 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) return r; } -static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, - struct kvm_memory_slot *slot, gfn_t gfn, int level, - unsigned long data) -{ - u64 *sptep; - struct rmap_iterator iter; - int young = 0; - - for_each_rmap_spte(rmap_head, &iter, sptep) - young |= mmu_spte_age(sptep); - - trace_kvm_age_page(gfn, level, slot, young); - return young; -} - static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 2ef8615f9dba..f666b0fab861 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -639,45 +639,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, return ret; } -static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, - unsigned long end, unsigned long data, - int (*handler)(struct kvm *kvm, struct kvm_memory_slot *slot, - struct kvm_mmu_page *root, gfn_t start, - gfn_t end, unsigned long data)) -{ - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - struct kvm_mmu_page *root; - int ret = 0; - int as_id; - - for_each_tdp_mmu_root_yield_safe(kvm, root) { - as_id = kvm_mmu_page_as_id(root); - slots = __kvm_memslots(kvm, as_id); - kvm_for_each_memslot(memslot, slots) { - unsigned long hva_start, hva_end; - gfn_t gfn_start, gfn_end; - - hva_start = max(start, memslot->userspace_addr); - hva_end = min(end, memslot->userspace_addr + - (memslot->npages << PAGE_SHIFT)); - if (hva_start >= hva_end) - continue; - /* - * {gfn(page) | page intersects with [hva_start, hva_end)} = - * {gfn_start, gfn_start+1, ..., gfn_end-1}. - */ - gfn_start = hva_to_gfn_memslot(hva_start, memslot); - gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); - - ret |= handler(kvm, memslot, root, gfn_start, - gfn_end, data); - } - } - - return ret; -} - static int zap_gfn_range_hva_wrapper(struct kvm *kvm, struct kvm_memory_slot *slot, struct kvm_mmu_page *root, gfn_t start, @@ -686,13 +647,6 @@ static int zap_gfn_range_hva_wrapper(struct kvm *kvm, return zap_gfn_range(kvm, root, start, end, false); } -int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start, - unsigned long end) -{ - return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, - zap_gfn_range_hva_wrapper); -} - /* * Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero * if any of the GFNs in the range have been accessed. @@ -739,6 +693,76 @@ static int age_gfn_range(struct kvm *kvm, struct kvm_memory_slot *slot, return young; } +static int kvm_tdp_mmu_handle_hva_do(struct kvm *kvm, + struct kvm_memory_slot *slot, + struct kvm_mmu_page *root, + gfn_t start, gfn_t end, + unsigned long data, + int (*handler)(struct kvm *kvm, + struct kvm_memory_slot *slot, + struct kvm_mmu_page *root, + gfn_t start, gfn_t end, + unsigned long data)) +{ +#ifdef CONFIG_RETPOLINE + if (handler == zap_gfn_range_hva_wrapper) + return zap_gfn_range_hva_wrapper(kvm, slot, root, + start, end, data); + else if (handler == age_gfn_range) + return age_gfn_range(kvm, slot, root, start, end, + data); + else +#endif + return handler(kvm, slot, root, start, end, data); +} + +static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start, + unsigned long end, unsigned long data, + int (*handler)(struct kvm *kvm, struct kvm_memory_slot *slot, + struct kvm_mmu_page *root, gfn_t start, + gfn_t end, unsigned long data)) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + struct kvm_mmu_page *root; + int ret = 0; + int as_id; + + for_each_tdp_mmu_root_yield_safe(kvm, root) { + as_id = kvm_mmu_page_as_id(root); + slots = __kvm_memslots(kvm, as_id); + kvm_for_each_memslot(memslot, slots) { + unsigned long hva_start, hva_end; + gfn_t gfn_start, gfn_end; + + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + /* + * {gfn(page) | page intersects with [hva_start, hva_end)} = + * {gfn_start, gfn_start+1, ..., gfn_end-1}. + */ + gfn_start = hva_to_gfn_memslot(hva_start, memslot); + gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); + + ret |= kvm_tdp_mmu_handle_hva_do(kvm, memslot, root, + gfn_start, gfn_end, + data, handler); + } + } + + return ret; +} + +int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start, + unsigned long end) +{ + return kvm_tdp_mmu_handle_hva_range(kvm, start, end, 0, + zap_gfn_range_hva_wrapper); +} + int kvm_tdp_mmu_age_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) {