Sean Christopherson <sean.j.christopherson@xxxxxxxxx> writes: > Add a dedicated hook to handle flushing TLB entries on behalf of the > guest, i.e. for a paravirtualized TLB flush, and use it directly instead > of bouncing through kvm_vcpu_flush_tlb(). > > For VMX, change the effective implementation implementation to never do > INVEPT and flush only the current context, i.e. to always flush via > INVVPID(SINGLE_CONTEXT). The INVEPT performed by __vmx_flush_tlb() when > @invalidate_gpa=false and enable_vpid=0 is unnecessary, as it will only > flush guest-physical mappings; linear and combined mappings are flushed > by VM-Enter when VPID is disabled, and changes in the guest pages tables > do not affect guest-physical mappings. > > When EPT and VPID are enabled, doing INVVPID is not required (by Intel's > architecture) to invalidate guest-physical mappings, i.e. TLB entries > that cache guest-physical mappings can live across INVVPID as the > mappings are associated with an EPTP, not a VPID. The intent of > @invalidate_gpa is to inform vmx_flush_tlb() that it must "invalidate > gpa mappings", i.e. do INVEPT and not simply INVVPID. Other than nested > VPID handling, which now calls vpid_sync_context() directly, the only > scenario where KVM can safely do INVVPID instead of INVEPT (when EPT is > enabled) is if KVM is flushing TLB entries from the guest's perspective, > i.e. is only required to invalidate linear mappings. > > For SVM, flushing TLB entries from the guest's perspective can be done > by flushing the current ASID, as changes to the guest's page tables are > associated only with the current ASID. > > Adding a dedicated ->tlb_flush_guest() paves the way toward removing > @invalidate_gpa, which is a potentially dangerous control flag as its > meaning is not exactly crystal clear, even for those who are familiar > with the subtleties of what mappings Intel CPUs are/aren't allowed to > keep across various invalidation scenarios. > > Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx> > --- > arch/x86/include/asm/kvm_host.h | 6 ++++++ > arch/x86/kvm/svm.c | 6 ++++++ > arch/x86/kvm/vmx/vmx.c | 13 +++++++++++++ > arch/x86/kvm/x86.c | 2 +- > 4 files changed, 26 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index cdbf822c5c8b..c08f4c0bf4d1 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1118,6 +1118,12 @@ struct kvm_x86_ops { > */ > void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); > > + /* > + * Flush any TLB entries created by the guest. Like tlb_flush_gva(), > + * does not need to flush GPA->HPA mappings. > + */ > + void (*tlb_flush_guest)(struct kvm_vcpu *vcpu); > + > void (*run)(struct kvm_vcpu *vcpu); > int (*handle_exit)(struct kvm_vcpu *vcpu, > enum exit_fastpath_completion exit_fastpath); > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > index 08568ae9f7a1..396f42753489 100644 > --- a/arch/x86/kvm/svm.c > +++ b/arch/x86/kvm/svm.c > @@ -5643,6 +5643,11 @@ static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva) > invlpga(gva, svm->vmcb->control.asid); > } > > +static void svm_flush_tlb_guest(struct kvm_vcpu *vcpu) > +{ > + svm_flush_tlb(vcpu, false); > +} > + > static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) > { > } > @@ -7400,6 +7405,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > > .tlb_flush = svm_flush_tlb, > .tlb_flush_gva = svm_flush_tlb_gva, > + .tlb_flush_guest = svm_flush_tlb_guest, > > .run = svm_vcpu_run, > .handle_exit = handle_exit, > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index ba24bbda2c12..57c1cee58d18 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -2862,6 +2862,18 @@ static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) > */ > } > > +static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) > +{ > + /* > + * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0 > + * or a vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit > + * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is > + * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed), > + * i.e. no explicit INVVPID is necessary. > + */ > + vpid_sync_context(to_vmx(vcpu)->vpid); > +} > + > static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) > { > ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; > @@ -7875,6 +7887,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { > > .tlb_flush = vmx_flush_tlb, > .tlb_flush_gva = vmx_flush_tlb_gva, > + .tlb_flush_guest = vmx_flush_tlb_guest, > > .run = vmx_vcpu_run, > .handle_exit = vmx_handle_exit, > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index f506248d61a1..0b90ec2c93cf 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -2725,7 +2725,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) > trace_kvm_pv_tlb_flush(vcpu->vcpu_id, > st->preempted & KVM_VCPU_FLUSH_TLB); > if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) > - kvm_vcpu_flush_tlb(vcpu, false); > + kvm_x86_ops->tlb_flush_guest(vcpu); > > vcpu->arch.st.preempted = 0; Reviewed-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> I *think* I've commented on the previous version that we also have hyperv-style PV TLB flush and this will likely need to be switched to tlb_flush_guest(). What do you think about the following (very lightly tested)? commit 485b4a579605597b9897b3d9ec118e0f7f1138ad Author: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> Date: Wed Mar 25 11:14:25 2020 +0100 KVM: x86: make Hyper-V PV TLB flush use tlb_flush_guest() Hyper-V PV TLB flush mechanism does TLB flush on behalf of the guest so doing tlb_flush_all() is an overkill, switch to using tlb_flush_guest() (just like KVM PV TLB flush mechanism) instead. Introduce KVM_REQ_HV_TLB_FLUSH to support the change. Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 167729624149..8c5659ed211b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -84,6 +84,7 @@ #define KVM_REQ_APICV_UPDATE \ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) +#define KVM_REQ_HV_TLB_FLUSH KVM_ARCH_REQ(27) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index a86fda7a1d03..0d051ed11f38 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1425,8 +1425,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't * analyze it here, flush TLB regardless of the specified address space. */ - kvm_make_vcpus_request_mask(kvm, - KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, + kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask, &hv_vcpu->tlb_flush); ret_success: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 210af343eebf..5096a9b1a04e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2702,6 +2702,12 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu) kvm_x86_ops->tlb_flush_all(vcpu); } +static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) +{ + ++vcpu->stat.tlb_flush; + kvm_x86_ops->tlb_flush_guest(vcpu); +} + static void record_steal_time(struct kvm_vcpu *vcpu) { struct kvm_host_map map; @@ -2725,7 +2731,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) trace_kvm_pv_tlb_flush(vcpu->vcpu_id, st->preempted & KVM_VCPU_FLUSH_TLB); if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) - kvm_x86_ops->tlb_flush_guest(vcpu); + kvm_vcpu_flush_tlb_guest(vcpu); vcpu->arch.st.preempted = 0; @@ -8219,7 +8225,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) kvm_vcpu_flush_tlb_current(vcpu); - + if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) + kvm_vcpu_flush_tlb_guest(vcpu); if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; r = 0; -- Vitaly