Re: [PATCH] KVM: SVM: Flush Hyper-V TLB when required

Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> · Wed, 22 Mar 2023 17:53:34 +0100

Sean Christopherson <seanjc@xxxxxxxxxx> writes:

> On Mon, Mar 20, 2023, Jeremi Piotrowski wrote:
>> ---
>>  arch/x86/kvm/kvm_onhyperv.c | 23 +++++++++++++++++++++++
>>  arch/x86/kvm/kvm_onhyperv.h |  5 +++++
>>  arch/x86/kvm/svm/svm.c      | 18 +++++++++++++++---
>>  3 files changed, 43 insertions(+), 3 deletions(-)
>> 
>> diff --git a/arch/x86/kvm/kvm_onhyperv.c b/arch/x86/kvm/kvm_onhyperv.c
>> index 482d6639ef88..036e04c0a161 100644
>> --- a/arch/x86/kvm/kvm_onhyperv.c
>> +++ b/arch/x86/kvm/kvm_onhyperv.c
>> @@ -94,6 +94,29 @@ int hv_remote_flush_tlb(struct kvm *kvm)
>>  }
>>  EXPORT_SYMBOL_GPL(hv_remote_flush_tlb);
>>  
>> +void hv_flush_tlb_current(struct kvm_vcpu *vcpu)
>> +{
>> +	struct kvm_arch *kvm_arch = &vcpu->kvm->arch;
>> +	hpa_t root_tdp = vcpu->arch.mmu->root.hpa;
>> +
>> +	if (kvm_x86_ops.tlb_remote_flush == hv_remote_flush_tlb && VALID_PAGE(root_tdp)) {
>> +		spin_lock(&kvm_arch->hv_root_tdp_lock);
>> +		if (kvm_arch->hv_root_tdp != root_tdp) {
>> +			hyperv_flush_guest_mapping(root_tdp);
>> +			kvm_arch->hv_root_tdp = root_tdp;
>
> In a vacuum, accessing kvm_arch->hv_root_tdp in the flush path is wrong.  This
> likely fixes the issues you are seeing because the KVM bug only affects the case
> when KVM is loading a new root (that used to be valid), in which case hv_root_tdp
> is guaranteed to be different.  But KVM should not rely on that behavior, i.e. if
> KVM says flush, then we flush.  There might be scenarios where the flush is
> unnecessary, but those flushes should be elided by the code that knows the flush
> is unnecessary, not in this common code just because the target root is the
> globally shared root.
>
> Somewhat of a moot point, but setting hv_root_tdp to root_tdp is also wrong.  KVM's
> behavior is that hv_root_tdp points at a valid root if and only if all vCPUs share
> said root.  E.g. invoking this when vCPUs have different roots will "corrupt"
> hv_root_tdp and possibly cause a remote flush to do the wrong thing.
>
>> +		}
>> +		spin_unlock(&kvm_arch->hv_root_tdp_lock);
>> +	}
>> +}
>> +EXPORT_SYMBOL_GPL(hv_flush_tlb_current);
>> +
>> +void hv_flush_tlb_all(struct kvm_vcpu *vcpu)
>> +{
>> +	if (WARN_ON_ONCE(kvm_x86_ops.tlb_remote_flush == hv_remote_flush_tlb))
>
> Hmm, looking at the KVM code, AFAICT KVM only enables enlightened_npt_tlb for L1
> (L1 from KVM's perspective) as svm_hv_init_vmcb() is only ever called with vmcb01,
> never with vmcb02.  I don't know if that's intentional, but I do think it means
> KVM can skip the Hyper-V flush for vmcb02 and instead rely on the ASID flush,
> i.e. KVM can do the Hyper-V iff enlightened_npt_tlb is set in the current VMCB.
> And that should continue to work if KVM does ever enabled enlightened_npt_tlb for L2.
>
>> +		hv_remote_flush_tlb(vcpu->kvm);
>> +}
>> +EXPORT_SYMBOL_GPL(hv_flush_tlb_all);
>
> I'd rather not add helpers to the common KVM code.  I do like minimizing the amount
> of #ifdeffery, but defining these as common helpers makes it seem like VMX-on-HyperV
> is broken, i.e. raises the question of why VMX doesn't use these helpers when running
> on Hyper-V.
>
> I'm thinking this?
>
> ---
>  arch/x86/kvm/svm/svm.c          | 39 ++++++++++++++++++++++++++++++---
>  arch/x86/kvm/svm/svm_onhyperv.h |  7 ++++++
>  2 files changed, 43 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 70183d2271b5..ab97fe8f1d81 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3746,7 +3746,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
>  	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
>  }
>  
> -static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
> +static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> @@ -3770,6 +3770,39 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
>  		svm->current_vmcb->asid_generation--;
>  }
>  
> +static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
> +{
> +#if IS_ENABLED(CONFIG_HYPERV)
> +	hpa_t root_tdp = vcpu->arch.mmu->root.hpa;
> +
> +	/*
> +	 * When running on Hyper-V with EnlightenedNptTlb enabled, explicitly
> +	 * flush the NPT mappings via hypercall as flushing the ASID only
> +	 * affects virtual to physical mappings, it does not invalidate guest
> +	 * physical to host physical mappings.
> +	 */
> +	if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp))
> +		hyperv_flush_guest_mapping(root_tdp);
> +#endif
> +	svm_flush_tlb_asid(vcpu);
> +}
> +
> +static void svm_flush_tlb_all(struct kvm_vcpu *vcpu)
> +{
> +#if IS_ENABLED(CONFIG_HYPERV)
> +	/*
> +	 * When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB
> +	 * flushes should be routed to hv_remote_flush_tlb() without requesting
> +	 * a "regular" remote flush.  Reaching this point means either there's
> +	 * a KVM bug or a prior hv_remote_flush_tlb() call failed, both of
> +	 * which might be fatal to the the guest.  Yell, but try to recover.
> +	 */
> +	if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu)))
> +		hv_remote_flush_tlb(vcpu->kvm);
> +#endif
> +	svm_flush_tlb_asid(vcpu);
> +}
> +
>  static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -4762,10 +4795,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>  	.set_rflags = svm_set_rflags,
>  	.get_if_flag = svm_get_if_flag,
>  
> -	.flush_tlb_all = svm_flush_tlb_current,
> +	.flush_tlb_all = svm_flush_tlb_all,
>  	.flush_tlb_current = svm_flush_tlb_current,
>  	.flush_tlb_gva = svm_flush_tlb_gva,
> -	.flush_tlb_guest = svm_flush_tlb_current,
> +	.flush_tlb_guest = svm_flush_tlb_asid,
>  
>  	.vcpu_pre_run = svm_vcpu_pre_run,
>  	.vcpu_run = svm_vcpu_run,
> diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
> index cff838f15db5..d91e019fb7da 100644
> --- a/arch/x86/kvm/svm/svm_onhyperv.h
> +++ b/arch/x86/kvm/svm/svm_onhyperv.h
> @@ -15,6 +15,13 @@ static struct kvm_x86_ops svm_x86_ops;
>  
>  int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu);
>  
> +static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu)
> +{
> +	struct hv_vmcb_enlightenments *hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments;
> +
> +	return !!hve->hv_enlightenments_control.enlightened_npt_tlb;

In theory, we should not look at Hyper-V enlightenments in VMCB control
just because our kernel has CONFIG_HYPERV enabled. I'd suggest we add a
real check that we're running on Hyper-V and we can do it the same way
it is done in svm_hv_hardware_setup()/svm_hv_init_vmcb():

	return (ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB)
		&& !!hve->hv_enlightenments_control.enlightened_npt_tlb;

(untested).

> +}
> +
>  static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
>  {
>  	struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
>
> base-commit: 50f13998451effea5c5fdc70fe576f8b435d6224

-- 
Vitaly