On Tue, Jun 12, 2018 at 03:52:39PM -0700, Junaid Shahid wrote: > When PCIDs are enabled, the MSb of the source operand for a MOV-to-CR3 > instruction indicates that the TLB doesn't need to be flushed. > > This change enables this optimization for MOV-to-CR3s in the guest > that have been intercepted by KVM for shadow paging and are handled > within the fast CR3 switch path. > > Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 2 +- > arch/x86/kvm/mmu.c | 17 ++++++++++++----- > arch/x86/kvm/vmx.c | 19 ++++++++++++++----- > arch/x86/kvm/x86.c | 12 +++++++++--- > 4 files changed, 36 insertions(+), 14 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 494bb2d1d1ad..c53499c32f2b 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1310,7 +1310,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, > void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); > void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); > void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, > - union kvm_mmu_page_role new_role); > + union kvm_mmu_page_role new_role, bool skip_tlb_flush); > > void kvm_enable_tdp(void); > void kvm_disable_tdp(void); > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 76030ecb5996..6e198ac0b519 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -4026,7 +4026,8 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, > } > > static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, > - union kvm_mmu_page_role new_role) > + union kvm_mmu_page_role new_role, > + bool skip_tlb_flush) > { > struct kvm_mmu *mmu = &vcpu->arch.mmu; > > @@ -4059,7 +4060,9 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, > > kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); > kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); > - kvm_x86_ops->tlb_flush(vcpu, true); > + if (!skip_tlb_flush) > + kvm_x86_ops->tlb_flush(vcpu, true); > + > __clear_sp_write_flooding_count( > page_header(mmu->root_hpa)); > > @@ -4071,9 +4074,9 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, > } > > void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, > - union kvm_mmu_page_role new_role) > + union kvm_mmu_page_role new_role, bool skip_tlb_flush) > { > - if (!fast_cr3_switch(vcpu, new_cr3, new_role)) > + if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush)) > kvm_mmu_free_roots(vcpu, false); > } > EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3); > @@ -5186,11 +5189,15 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) > kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > } > > + if (VALID_PAGE(mmu->prev_root.hpa) && > + pcid == kvm_get_pcid(vcpu, mmu->prev_root.cr3)) > + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > + > ++vcpu->stat.invlpg; > > /* > * Mappings not reachable via the current cr3 will be synced when > - * switching to that cr3, so nothing needs to be done here for them. > + * switching to that cr3, so nothing needs to be synced here for them. Hmm, IMO this comment and the similar comment for the single context case should either be updated to explicitly call out that we also need to flush the TLB if the PCID matches that of the fast CR3 cache, or simply removed altogether. > */ > } > EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva); > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index a1b75a9645f2..ac4e78c2da38 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -8682,9 +8682,13 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) > kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > } > > + if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_root.cr3) > + == operand.pcid) > + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > + > /* > * If the current cr3 does not use the given PCID, then nothing > - * needs to be done here because a resync will happen anyway > + * needs to be synced here because a resync will happen anyway > * before switching to any other CR3. > */ > > @@ -10652,13 +10656,17 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) > > static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) > { > + union kvm_mmu_page_role role; > + > WARN_ON(mmu_is_nested(vcpu)); > if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu))) > return 1; > > - kvm_mmu_new_cr3(vcpu, nested_ept_get_cr3(vcpu), > - kvm_mmu_calc_shadow_ept_root_page_role(vcpu, > - nested_ept_ad_enabled(vcpu))); > + role = kvm_mmu_calc_shadow_ept_root_page_role(vcpu, > + nested_ept_ad_enabled(vcpu)); > + > + kvm_mmu_new_cr3(vcpu, nested_ept_get_cr3(vcpu), role, false); > + > kvm_init_shadow_ept_mmu(vcpu, > to_vmx(vcpu)->nested.msrs.ept_caps & > VMX_EPT_EXECUTE_ONLY_BIT, > @@ -11216,7 +11224,8 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne > } > > if (!nested_ept) > - kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu)); > + kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu), > + false); > > vcpu->arch.cr3 = cr3; > __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index f1b352a2604b..151a8f61c7f1 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -847,16 +847,21 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); > > int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) > { > + bool skip_tlb_flush = false; > #ifdef CONFIG_X86_64 > bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); > > - if (pcid_enabled) > + if (pcid_enabled) { > + skip_tlb_flush = cr3 & CR3_PCID_INVD; > cr3 &= ~CR3_PCID_INVD; While you're here, could you remove KVM's CR3_PCID_INVD define and use the kernel's X86_CR3_PCID_NOFLUSH define? Duplication aside, CR3_PCID_INVD is a terrible name since it inverts the logic. > + } > #endif > > if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { > kvm_mmu_sync_roots(vcpu); > - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > + > + if (!skip_tlb_flush) > + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > return 0; > } > > @@ -867,7 +872,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) > !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) > return 1; > > - kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu)); > + kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu), > + skip_tlb_flush); > vcpu->arch.cr3 = cr3; > __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); > > -- > 2.18.0.rc1.242.g61856ae69a-goog >