On Wed, Jun 13, 2018 at 11:39:05AM -0700, Sean Christopherson wrote: > On Tue, Jun 12, 2018 at 03:52:39PM -0700, Junaid Shahid wrote: > > When PCIDs are enabled, the MSb of the source operand for a MOV-to-CR3 > > instruction indicates that the TLB doesn't need to be flushed. > > > > This change enables this optimization for MOV-to-CR3s in the guest > > that have been intercepted by KVM for shadow paging and are handled > > within the fast CR3 switch path. > > > > Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx> > > --- > > arch/x86/include/asm/kvm_host.h | 2 +- > > arch/x86/kvm/mmu.c | 17 ++++++++++++----- > > arch/x86/kvm/vmx.c | 19 ++++++++++++++----- > > arch/x86/kvm/x86.c | 12 +++++++++--- > > 4 files changed, 36 insertions(+), 14 deletions(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index 494bb2d1d1ad..c53499c32f2b 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -1310,7 +1310,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, > > void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); > > void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); > > void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, > > - union kvm_mmu_page_role new_role); > > + union kvm_mmu_page_role new_role, bool skip_tlb_flush); > > > > void kvm_enable_tdp(void); > > void kvm_disable_tdp(void); > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > > index 76030ecb5996..6e198ac0b519 100644 > > --- a/arch/x86/kvm/mmu.c > > +++ b/arch/x86/kvm/mmu.c > > @@ -4026,7 +4026,8 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, > > } > > > > static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, > > - union kvm_mmu_page_role new_role) > > + union kvm_mmu_page_role new_role, > > + bool skip_tlb_flush) > > { > > struct kvm_mmu *mmu = &vcpu->arch.mmu; > > > > @@ -4059,7 +4060,9 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, > > > > kvm_make_request(KVM_REQ_LOAD_CR3, vcpu); > > kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); > > - kvm_x86_ops->tlb_flush(vcpu, true); > > + if (!skip_tlb_flush) > > + kvm_x86_ops->tlb_flush(vcpu, true); > > + > > __clear_sp_write_flooding_count( > > page_header(mmu->root_hpa)); > > > > @@ -4071,9 +4074,9 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, > > } > > > > void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, > > - union kvm_mmu_page_role new_role) > > + union kvm_mmu_page_role new_role, bool skip_tlb_flush) > > { > > - if (!fast_cr3_switch(vcpu, new_cr3, new_role)) > > + if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush)) > > kvm_mmu_free_roots(vcpu, false); > > } > > EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3); > > @@ -5186,11 +5189,15 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) > > kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > > } > > > > + if (VALID_PAGE(mmu->prev_root.hpa) && > > + pcid == kvm_get_pcid(vcpu, mmu->prev_root.cr3)) > > + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > > + > > ++vcpu->stat.invlpg; > > > > /* > > * Mappings not reachable via the current cr3 will be synced when > > - * switching to that cr3, so nothing needs to be done here for them. > > + * switching to that cr3, so nothing needs to be synced here for them. > > Hmm, IMO this comment and the similar comment for the single context > case should either be updated to explicitly call out that we also need > to flush the TLB if the PCID matches that of the fast CR3 cache, or > simply removed altogether. Just saw that you do update the comment in PATCH 15/17, maybe part or all of the comment update should be moved to this patch? > > */ > > } > > EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva); > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > > index a1b75a9645f2..ac4e78c2da38 100644 > > --- a/arch/x86/kvm/vmx.c > > +++ b/arch/x86/kvm/vmx.c > > @@ -8682,9 +8682,13 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) > > kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > > } > > > > + if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_root.cr3) > > + == operand.pcid) > > + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > > + > > /* > > * If the current cr3 does not use the given PCID, then nothing > > - * needs to be done here because a resync will happen anyway > > + * needs to be synced here because a resync will happen anyway > > * before switching to any other CR3. > > */ > > > > @@ -10652,13 +10656,17 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) > > > > static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) > > { > > + union kvm_mmu_page_role role; > > + > > WARN_ON(mmu_is_nested(vcpu)); > > if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu))) > > return 1; > > > > - kvm_mmu_new_cr3(vcpu, nested_ept_get_cr3(vcpu), > > - kvm_mmu_calc_shadow_ept_root_page_role(vcpu, > > - nested_ept_ad_enabled(vcpu))); > > + role = kvm_mmu_calc_shadow_ept_root_page_role(vcpu, > > + nested_ept_ad_enabled(vcpu)); > > + > > + kvm_mmu_new_cr3(vcpu, nested_ept_get_cr3(vcpu), role, false); > > + > > kvm_init_shadow_ept_mmu(vcpu, > > to_vmx(vcpu)->nested.msrs.ept_caps & > > VMX_EPT_EXECUTE_ONLY_BIT, > > @@ -11216,7 +11224,8 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne > > } > > > > if (!nested_ept) > > - kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu)); > > + kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu), > > + false); > > > > vcpu->arch.cr3 = cr3; > > __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index f1b352a2604b..151a8f61c7f1 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -847,16 +847,21 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); > > > > int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) > > { > > + bool skip_tlb_flush = false; > > #ifdef CONFIG_X86_64 > > bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); > > > > - if (pcid_enabled) > > + if (pcid_enabled) { > > + skip_tlb_flush = cr3 & CR3_PCID_INVD; > > cr3 &= ~CR3_PCID_INVD; > > While you're here, could you remove KVM's CR3_PCID_INVD define and > use the kernel's X86_CR3_PCID_NOFLUSH define? Duplication aside, > CR3_PCID_INVD is a terrible name since it inverts the logic. > > > + } > > #endif > > > > if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { > > kvm_mmu_sync_roots(vcpu); > > - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > > + > > + if (!skip_tlb_flush) > > + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); > > return 0; > > } > > > > @@ -867,7 +872,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) > > !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) > > return 1; > > > > - kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu)); > > + kvm_mmu_new_cr3(vcpu, cr3, kvm_mmu_calc_root_page_role(vcpu), > > + skip_tlb_flush); > > vcpu->arch.cr3 = cr3; > > __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); > > > > -- > > 2.18.0.rc1.242.g61856ae69a-goog > >