When PCIDs are enabled, the MSb of the source operand for a MOV-to-CR3 instruction indicates that the TLB doesn't need to be flushed. This change enables this optimization for MOV-to-CR3s in the guest that have been intercepted by KVM for shadow paging and are handled within the fast CR3 switch path. Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 17 ++++++++++++----- arch/x86/kvm/vmx.c | 19 ++++++++++++++----- arch/x86/kvm/x86.c | 12 +++++++++--- 4 files changed, 36 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 494bb2d1d1ad..c53499c32f2b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1310,7 +1310,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, - union kvm_mmu_page_role new_role); + union kvm_mmu_page_role new_role, bool skip_tlb_flush); void kvm_enable_tdp(void); void kvm_disable_tdp(void); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 76030ecb5996..6e198ac0b519 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4026,7 +4026,8 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, } static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, - union kvm_mmu_page_role new_role) + union kvm_mmu_page_role new_role, + bool skip_tlb_flush) { struct kvm_mmu *mmu = &vcpu->arch.mmu; @@ -4059,7 +4060,9 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); - kvm_x86_ops->tlb_flush(vcpu, true); + if (!skip_tlb_flush) + kvm_x86_ops->tlb_flush(vcpu, true); + __clear_sp_write_flooding_count( page_header(mmu->root_hpa)); @@ -4071,9 +4074,9 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, } void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, - union kvm_mmu_page_role new_role) + union kvm_mmu_page_role new_role, bool skip_tlb_flush) { - if (!fast_cr3_switch(vcpu, new_cr3, new_role)) + if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush)) kvm_mmu_free_roots(vcpu, false); } EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3); @@ -5186,11 +5189,15 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } + if (VALID_PAGE(mmu->prev_root.hpa) && + pcid == kvm_get_pcid(vcpu, mmu->prev_root.cr3)) + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + ++vcpu->stat.invlpg; /* * Mappings not reachable via the current cr3 will be synced when - * switching to that cr3, so nothing needs to be done here for them. + * switching to that cr3, so nothing needs to be synced here for them. */ } EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a1b75a9645f2..ac4e78c2da38 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8682,9 +8682,13 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } + if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_root.cr3) + == operand.pcid) + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + /* * If the current cr3 does not use the given PCID, then nothing - * needs to be done here because a resync will happen anyway + * needs to be synced here because a resync will happen anyway * before switching to any other CR3. */ @@ -10652,13 +10656,17 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) { + union kvm_mmu_page_role role; + WARN_ON(mmu_is_nested(vcpu)); if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu))) return 1; - kvm_mmu_new_cr3(vcpu, nested_ept_get_cr3(vcpu), - kvm_mmu_calc_shadow_ept_root_page_role(vcpu, - nested_ept_ad_enabled(vcpu))); + role = kvm_mmu_calc_shadow_ept_root_page_role(vcpu, + nested_ept_ad_enabled(vcpu)); + + kvm_mmu_new_cr3(vcpu, nested_ept_get_cr3(vcpu), role, false); + kvm_init_shadow_ept_mmu(vcpu, to_vmx(vcpu)->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT, @@ -11216,7 +11224,8 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne } if (!nested_ept) - kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu)); + kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu), + false); vcpu->arch.cr3 = cr3; __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f1b352a2604b..151a8f61c7f1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -847,16 +847,21 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + bool skip_tlb_flush = false; #ifdef CONFIG_X86_64 bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); - if (pcid_enabled) + if (pcid_enabled) { + skip_tlb_flush = cr3 & CR3_PCID_INVD; cr3 &= ~CR3_PCID_INVD; + } #endif if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + + if (!skip_tlb_flush) + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); return 0; } @@ -867,7 +872,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; - kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu)); + kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu), + skip_tlb_flush); vcpu->arch.cr3 = cr3; __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); -- 2.18.0.rc1.242.g61856ae69a-goog