Adds support for storing multiple previous CR3/root_hpa pairs maintained as an LRU cache, so that the lockless CR3 switch path can be used when switching back to any of them. Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 10 +-- arch/x86/kvm/mmu.c | 111 ++++++++++++++++++++++---------- arch/x86/kvm/vmx.c | 12 ++-- 3 files changed, 92 insertions(+), 41 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index df383a05e42a..75e9baea5dea 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -334,6 +334,8 @@ struct kvm_mmu_root_info { #define KVM_MMU_ROOT_INFO_INVALID \ ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE }) +#define KVM_MMU_NUM_PREV_ROOTS 3 + /* * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the @@ -362,7 +364,7 @@ struct kvm_mmu { u8 shadow_root_level; u8 ept_ad; bool direct_map; - struct kvm_mmu_root_info prev_root; + struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS]; /* * Bitmap; bit set = permission fault @@ -1284,9 +1286,9 @@ static inline int __kvm_irq_line_state(unsigned long *irq_state, return !!(*irq_state); } -#define KVM_MMU_ROOT_CURRENT BIT(0) -#define KVM_MMU_ROOT_PREVIOUS BIT(1) -#define KVM_MMU_ROOTS_ALL (~0UL) +#define KVM_MMU_ROOT_CURRENT BIT(0) +#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i) +#define KVM_MMU_ROOTS_ALL (~0UL) int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c09331d9f330..d01380850848 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3434,18 +3434,26 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free) LIST_HEAD(invalid_list); struct kvm_mmu *mmu = &vcpu->arch.mmu; bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT; - bool free_prev_root = roots_to_free & KVM_MMU_ROOT_PREVIOUS; + + BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG); /* Before acquiring the MMU lock, see if we need to do any real work. */ - if (!(free_active_root && VALID_PAGE(mmu->root_hpa)) && - !(free_prev_root && VALID_PAGE(mmu->prev_root.hpa))) - return; + if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) { + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) && + VALID_PAGE(mmu->prev_roots[i].hpa)) + break; + + if (i == KVM_MMU_NUM_PREV_ROOTS) + return; + } spin_lock(&vcpu->kvm->mmu_lock); - if (free_prev_root) - mmu_free_root_page(vcpu->kvm, &mmu->prev_root.hpa, - &invalid_list); + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) + mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa, + &invalid_list); if (free_active_root) { if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && @@ -4053,6 +4061,38 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, context->nx = false; } +/* + * Find out if a previously cached root matching the new CR3/role is available. + * The current root is also inserted into the cache. + * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is + * returned. + * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and + * false is returned. This root should now be freed by the caller. + */ +static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3, + union kvm_mmu_page_role new_role) +{ + uint i; + struct kvm_mmu_root_info root; + struct kvm_mmu *mmu = &vcpu->arch.mmu; + + root.cr3 = mmu->get_cr3(vcpu); + root.hpa = mmu->root_hpa; + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { + swap(root, mmu->prev_roots[i]); + + if (new_cr3 == root.cr3 && VALID_PAGE(root.hpa) && + page_header(root.hpa) != NULL && + new_role.word == page_header(root.hpa)->role.word) + break; + } + + mmu->root_hpa = root.hpa; + + return i < KVM_MMU_NUM_PREV_ROOTS; +} + static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, union kvm_mmu_page_role new_role, bool skip_tlb_flush) @@ -4066,18 +4106,10 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, */ if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && mmu->root_level >= PT64_ROOT_4LEVEL) { - gpa_t prev_cr3 = mmu->prev_root.cr3; - if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT)) return false; - swap(mmu->root_hpa, mmu->prev_root.hpa); - mmu->prev_root.cr3 = mmu->get_cr3(vcpu); - - if (new_cr3 == prev_cr3 && - VALID_PAGE(mmu->root_hpa) && - page_header(mmu->root_hpa) != NULL && - new_role.word == page_header(mmu->root_hpa)->role.word) { + if (cached_root_available(vcpu, new_cr3, new_role)) { /* * It is possible that the cached previous root page is * obsolete because of a change in the MMU @@ -4842,8 +4874,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots) { if (reset_roots) { + uint i; + vcpu->arch.mmu.root_hpa = INVALID_PAGE; - vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID; + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; } if (mmu_is_nested(vcpu)) @@ -5213,22 +5249,24 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { struct kvm_mmu *mmu = &vcpu->arch.mmu; + uint i; mmu->invlpg(vcpu, gva, mmu->root_hpa); /* * INVLPG is required to invalidate any global mappings for the VA, * irrespective of PCID. Since it would take us roughly similar amount - * of work to determine whether the prev_root mapping of the VA is - * marked global, or to just sync it blindly, so we might as well just - * always sync it. + * of work to determine whether any of the prev_root mappings of the VA + * is marked global, or to just sync it blindly, so we might as well + * just always sync it. * - * Mappings not reachable via the current cr3 or the prev_root.cr3 will - * be synced when switching to that cr3, so nothing needs to be done - * here for them. + * Mappings not reachable via the current cr3 or the prev_roots will be + * synced when switching to that cr3, so nothing needs to be done here + * for them. */ - if (VALID_PAGE(mmu->prev_root.hpa)) - mmu->invlpg(vcpu, gva, mmu->prev_root.hpa); + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if (VALID_PAGE(mmu->prev_roots[i].hpa)) + mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); kvm_x86_ops->tlb_flush_gva(vcpu, gva); ++vcpu->stat.invlpg; @@ -5239,16 +5277,19 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) { struct kvm_mmu *mmu = &vcpu->arch.mmu; bool tlb_flush = false; + uint i; if (pcid == kvm_get_active_pcid(vcpu)) { mmu->invlpg(vcpu, gva, mmu->root_hpa); tlb_flush = true; } - if (VALID_PAGE(mmu->prev_root.hpa) && - pcid == kvm_get_pcid(vcpu, mmu->prev_root.cr3)) { - mmu->invlpg(vcpu, gva, mmu->prev_root.hpa); - tlb_flush = true; + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { + if (VALID_PAGE(mmu->prev_roots[i].hpa) && + pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) { + mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); + tlb_flush = true; + } } if (tlb_flush) @@ -5257,9 +5298,9 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) ++vcpu->stat.invlpg; /* - * Mappings not reachable via the current cr3 or the prev_root.cr3 will - * be synced when switching to that cr3, so nothing needs to be done - * here for them. + * Mappings not reachable via the current cr3 or the prev_roots will be + * synced when switching to that cr3, so nothing needs to be done here + * for them. */ } EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva); @@ -5305,12 +5346,16 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) int kvm_mmu_create(struct kvm_vcpu *vcpu) { + uint i; + vcpu->arch.walk_mmu = &vcpu->arch.mmu; vcpu->arch.mmu.root_hpa = INVALID_PAGE; - vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID; vcpu->arch.mmu.translate_gpa = translate_gpa; vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; + return alloc_mmu_pages(vcpu); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d83b9dad8a9f..f5725fd48b54 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8647,6 +8647,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) bool pcid_enabled; gva_t gva; struct x86_exception e; + uint i; + ulong roots_to_free = 0; struct { u64 pcid; u64 gla; @@ -8706,13 +8708,15 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } - if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_root.cr3) - == operand.pcid) - kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_PREVIOUS); + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3) + == operand.pcid) + roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); + kvm_mmu_free_roots(vcpu, roots_to_free); /* - * If neither the current cr3 nor the prev_root.cr3 use the + * If neither the current cr3 nor any of the prev_roots use the * given PCID, then nothing needs to be done here because a * resync will happen anyway before switching to any other CR3. */ -- 2.18.0.rc1.242.g61856ae69a-goog