Re: [PATCH v3 14/37] KVM: x86: Move "flush guest's TLB" logic to separate kvm_x86_ops hook

Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> · Wed, 25 Mar 2020 11:23:41 +0100

Sean Christopherson <sean.j.christopherson@xxxxxxxxx> writes:

> Add a dedicated hook to handle flushing TLB entries on behalf of the
> guest, i.e. for a paravirtualized TLB flush, and use it directly instead
> of bouncing through kvm_vcpu_flush_tlb().
>
> For VMX, change the effective implementation implementation to never do
> INVEPT and flush only the current context, i.e. to always flush via
> INVVPID(SINGLE_CONTEXT).  The INVEPT performed by __vmx_flush_tlb() when
> @invalidate_gpa=false and enable_vpid=0 is unnecessary, as it will only
> flush guest-physical mappings; linear and combined mappings are flushed
> by VM-Enter when VPID is disabled, and changes in the guest pages tables
> do not affect guest-physical mappings.
>
> When EPT and VPID are enabled, doing INVVPID is not required (by Intel's
> architecture) to invalidate guest-physical mappings, i.e. TLB entries
> that cache guest-physical mappings can live across INVVPID as the
> mappings are associated with an EPTP, not a VPID.  The intent of
> @invalidate_gpa is to inform vmx_flush_tlb() that it must "invalidate
> gpa mappings", i.e. do INVEPT and not simply INVVPID.  Other than nested
> VPID handling, which now calls vpid_sync_context() directly, the only
> scenario where KVM can safely do INVVPID instead of INVEPT (when EPT is
> enabled) is if KVM is flushing TLB entries from the guest's perspective,
> i.e. is only required to invalidate linear mappings.
>
> For SVM, flushing TLB entries from the guest's perspective can be done
> by flushing the current ASID, as changes to the guest's page tables are
> associated only with the current ASID.
>
> Adding a dedicated ->tlb_flush_guest() paves the way toward removing
> @invalidate_gpa, which is a potentially dangerous control flag as its
> meaning is not exactly crystal clear, even for those who are familiar
> with the subtleties of what mappings Intel CPUs are/aren't allowed to
> keep across various invalidation scenarios.
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> ---
>  arch/x86/include/asm/kvm_host.h |  6 ++++++
>  arch/x86/kvm/svm.c              |  6 ++++++
>  arch/x86/kvm/vmx/vmx.c          | 13 +++++++++++++
>  arch/x86/kvm/x86.c              |  2 +-
>  4 files changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index cdbf822c5c8b..c08f4c0bf4d1 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1118,6 +1118,12 @@ struct kvm_x86_ops {
>  	 */
>  	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
>  
> +	/*
> +	 * Flush any TLB entries created by the guest.  Like tlb_flush_gva(),
> +	 * does not need to flush GPA->HPA mappings.
> +	 */
> +	void (*tlb_flush_guest)(struct kvm_vcpu *vcpu);
> +
>  	void (*run)(struct kvm_vcpu *vcpu);
>  	int (*handle_exit)(struct kvm_vcpu *vcpu,
>  		enum exit_fastpath_completion exit_fastpath);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 08568ae9f7a1..396f42753489 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -5643,6 +5643,11 @@ static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
>  	invlpga(gva, svm->vmcb->control.asid);
>  }
>  
> +static void svm_flush_tlb_guest(struct kvm_vcpu *vcpu)
> +{
> +	svm_flush_tlb(vcpu, false);
> +}
> +
>  static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
>  {
>  }
> @@ -7400,6 +7405,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>  
>  	.tlb_flush = svm_flush_tlb,
>  	.tlb_flush_gva = svm_flush_tlb_gva,
> +	.tlb_flush_guest = svm_flush_tlb_guest,
>  
>  	.run = svm_vcpu_run,
>  	.handle_exit = handle_exit,
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index ba24bbda2c12..57c1cee58d18 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2862,6 +2862,18 @@ static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr)
>  	 */
>  }
>  
> +static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu)
> +{
> +	/*
> +	 * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0
> +	 * or a vpid couldn't be allocated for this vCPU.  VM-Enter and VM-Exit
> +	 * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is
> +	 * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed),
> +	 * i.e. no explicit INVVPID is necessary.
> +	 */
> +	vpid_sync_context(to_vmx(vcpu)->vpid);
> +}
> +
>  static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
>  {
>  	ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
> @@ -7875,6 +7887,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
>  
>  	.tlb_flush = vmx_flush_tlb,
>  	.tlb_flush_gva = vmx_flush_tlb_gva,
> +	.tlb_flush_guest = vmx_flush_tlb_guest,
>  
>  	.run = vmx_vcpu_run,
>  	.handle_exit = vmx_handle_exit,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f506248d61a1..0b90ec2c93cf 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2725,7 +2725,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
>  	trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
>  		st->preempted & KVM_VCPU_FLUSH_TLB);
>  	if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
> -		kvm_vcpu_flush_tlb(vcpu, false);
> +		kvm_x86_ops->tlb_flush_guest(vcpu);
>  
>  	vcpu->arch.st.preempted = 0;

Reviewed-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>

I *think* I've commented on the previous version that we also have
hyperv-style PV TLB flush and this will likely need to be switched to
tlb_flush_guest(). What do you think about the following (very lightly
tested)?

commit 485b4a579605597b9897b3d9ec118e0f7f1138ad
Author: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>
Date:   Wed Mar 25 11:14:25 2020 +0100

    KVM: x86: make Hyper-V PV TLB flush use tlb_flush_guest()
    
    Hyper-V PV TLB flush mechanism does TLB flush on behalf of the guest
    so doing tlb_flush_all() is an overkill, switch to using tlb_flush_guest()
    (just like KVM PV TLB flush mechanism) instead. Introduce
    KVM_REQ_HV_TLB_FLUSH to support the change.
    
    Signed-off-by: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 167729624149..8c5659ed211b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -84,6 +84,7 @@
 #define KVM_REQ_APICV_UPDATE \
 	KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_TLB_FLUSH_CURRENT	KVM_ARCH_REQ(26)
+#define KVM_REQ_HV_TLB_FLUSH		KVM_ARCH_REQ(27)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a86fda7a1d03..0d051ed11f38 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1425,8 +1425,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
 	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
 	 * analyze it here, flush TLB regardless of the specified address space.
 	 */
-	kvm_make_vcpus_request_mask(kvm,
-				    KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
+	kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH,
 				    vcpu_mask, &hv_vcpu->tlb_flush);
 
 ret_success:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 210af343eebf..5096a9b1a04e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2702,6 +2702,12 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->tlb_flush_all(vcpu);
 }
 
+static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
+{
+	++vcpu->stat.tlb_flush;
+	kvm_x86_ops->tlb_flush_guest(vcpu);
+}
+
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
 	struct kvm_host_map map;
@@ -2725,7 +2731,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
 		st->preempted & KVM_VCPU_FLUSH_TLB);
 	if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
-		kvm_x86_ops->tlb_flush_guest(vcpu);
+		kvm_vcpu_flush_tlb_guest(vcpu);
 
 	vcpu->arch.st.preempted = 0;
 
@@ -8219,7 +8225,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 		if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
 			kvm_vcpu_flush_tlb_current(vcpu);
-
+		if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+			kvm_vcpu_flush_tlb_guest(vcpu);
 		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
 			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
 			r = 0;

-- 
Vitaly