On Tue, Jun 12, 2018 at 03:52:30PM -0700, Junaid Shahid wrote: > When using shadow paging, a CR3 switch in the guest results in a VM Exit. > In the common case, that VM exit doesn't require much processing by KVM. > However, it does acquire the MMU lock, which can start showing signs of > contention under some workloads even on a 2 VCPU VM when the guest is > using KPTI. Therefore, we add a fast path that avoids acquiring the MMU > lock in the most common cases e.g. when switching back and forth between > the kernel and user mode CR3s used by KPTI with no guest page table > changes in between. > > For now, this fast path is implemented only for 64-bit guests and hosts > to avoid the handling of PDPTEs, but it can be extended later to 32-bit > guests and/or hosts as well. > > Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 13 ++++++- > arch/x86/kvm/mmu.c | 66 ++++++++++++++++++++++++++++++--- > arch/x86/kvm/x86.c | 3 +- > 3 files changed, 74 insertions(+), 8 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 0ebe659f2802..0869a684f852 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -325,6 +325,14 @@ struct rsvd_bits_validate { > u64 bad_mt_xwr; > }; > > +struct kvm_mmu_root_info { > + gpa_t cr3; > + hpa_t hpa; > +}; > + > +#define KVM_MMU_ROOT_INFO_INVALID \ > + ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE }) > + > /* > * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, > * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the > @@ -353,6 +361,7 @@ struct kvm_mmu { > u8 shadow_root_level; > u8 ept_ad; > bool direct_map; > + struct kvm_mmu_root_info prev_root; > > /* > * Bitmap; bit set = permission fault > @@ -1279,7 +1288,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); > int kvm_mmu_load(struct kvm_vcpu *vcpu); > void kvm_mmu_unload(struct kvm_vcpu *vcpu); > void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); > -void kvm_mmu_free_roots(struct kvm_vcpu *vcpu); > +void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, bool free_prev_root); > gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, > struct x86_exception *exception); > gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, > @@ -1298,7 +1307,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); > int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, > void *insn, int insn_len); > void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); > -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); > +void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3); > > void kvm_enable_tdp(void); > void kvm_disable_tdp(void); > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index ad2e2a00dc71..0c52b5d1010b 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -3396,17 +3396,22 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, > *root_hpa = INVALID_PAGE; > } > > -void kvm_mmu_free_roots(struct kvm_vcpu *vcpu) > +void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, bool free_prev_root) > { > int i; > LIST_HEAD(invalid_list); > struct kvm_mmu *mmu = &vcpu->arch.mmu; > > - if (!VALID_PAGE(mmu->root_hpa)) > + if (!VALID_PAGE(mmu->root_hpa) && > + (!VALID_PAGE(mmu->prev_root.hpa) || !free_prev_root)) > return; > > spin_lock(&vcpu->kvm->mmu_lock); > > + if (free_prev_root) > + mmu_free_root_page(vcpu->kvm, &mmu->prev_root.hpa, > + &invalid_list); > + > if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && > (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) { > mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list); > @@ -4006,13 +4011,59 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, > context->root_level = 0; > context->shadow_root_level = PT32E_ROOT_LEVEL; > context->root_hpa = INVALID_PAGE; > + context->prev_root = KVM_MMU_ROOT_INFO_INVALID; > context->direct_map = true; > context->nx = false; > } > > -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) > +static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3) > { > - kvm_mmu_free_roots(vcpu); > + struct kvm_mmu *mmu = &vcpu->arch.mmu; > + > + /* > + * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid > + * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs > + * later if necessary. > + */ > + if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && > + mmu->root_level >= PT64_ROOT_4LEVEL) { > + gpa_t prev_cr3 = mmu->prev_root.cr3; > + > + if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT)) > + return false; > + > + swap(mmu->root_hpa, mmu->prev_root.hpa); > + mmu->prev_root.cr3 = kvm_read_cr3(vcpu); > + > + if (new_cr3 == prev_cr3 && VALID_PAGE(mmu->root_hpa)) { > + /* > + * It is possible that the cached previous root page is > + * obsolete because of a change in the MMU > + * generation number. However, that is accompanied by > + * KVM_REQ_MMU_RELOAD, which will free the root that we > + * have set here and allocate a new one. > + */ > + > + kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); > + __clear_sp_write_flooding_count( > + page_header(mmu->root_hpa)); > + > + vcpu->arch.cr3 = new_cr3; > + __set_bit(VCPU_EXREG_CR3, > + (ulong *)&vcpu->arch.regs_avail); Setting arch.cr3 here is unnecessary, kvm_set_cr3() is handling that duty before and after this patch. It's also confusing because the code in question is later removed in PATCH 06/17, which makes it seem like that patch is introducing a bug. > + mmu->set_cr3(vcpu, mmu->root_hpa); > + > + return true; > + } > + } > + > + return false; > +} > + > +void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3) > +{ > + if (!fast_cr3_switch(vcpu, new_cr3)) > + kvm_mmu_free_roots(vcpu, false); > } > > static unsigned long get_cr3(struct kvm_vcpu *vcpu) > @@ -4490,6 +4541,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, > context->update_pte = paging64_update_pte; > context->shadow_root_level = level; > context->root_hpa = INVALID_PAGE; > + context->prev_root = KVM_MMU_ROOT_INFO_INVALID; > context->direct_map = false; > } > > @@ -4520,6 +4572,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu, > context->update_pte = paging32_update_pte; > context->shadow_root_level = PT32E_ROOT_LEVEL; > context->root_hpa = INVALID_PAGE; > + context->prev_root = KVM_MMU_ROOT_INFO_INVALID; > context->direct_map = false; > } > > @@ -4543,6 +4596,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) > context->update_pte = nonpaging_update_pte; > context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu); > context->root_hpa = INVALID_PAGE; > + context->prev_root = KVM_MMU_ROOT_INFO_INVALID; > context->direct_map = true; > context->set_cr3 = kvm_x86_ops->set_tdp_cr3; > context->get_cr3 = get_cr3; > @@ -4625,6 +4679,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, > context->update_pte = ept_update_pte; > context->root_level = PT64_ROOT_4LEVEL; > context->root_hpa = INVALID_PAGE; > + context->prev_root = KVM_MMU_ROOT_INFO_INVALID; > context->direct_map = false; > context->base_role.ad_disabled = !accessed_dirty; > context->base_role.guest_mode = 1; > @@ -4727,7 +4782,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load); > > void kvm_mmu_unload(struct kvm_vcpu *vcpu) > { > - kvm_mmu_free_roots(vcpu); > + kvm_mmu_free_roots(vcpu, true); > WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); > } > EXPORT_SYMBOL_GPL(kvm_mmu_unload); > @@ -5107,6 +5162,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) > { > vcpu->arch.walk_mmu = &vcpu->arch.mmu; > vcpu->arch.mmu.root_hpa = INVALID_PAGE; > + vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID; > vcpu->arch.mmu.translate_gpa = translate_gpa; > vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 06dd4cdb2ca8..2d1041b5739b 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -867,9 +867,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) > !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) > return 1; > > + kvm_mmu_new_cr3(vcpu, cr3); > vcpu->arch.cr3 = cr3; > __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); > - kvm_mmu_new_cr3(vcpu); > + > return 0; > } > EXPORT_SYMBOL_GPL(kvm_set_cr3); > -- > 2.18.0.rc1.242.g61856ae69a-goog >