Sean Christopherson <seanjc@xxxxxxxxxx> writes: > On Mon, Mar 20, 2023, Jeremi Piotrowski wrote: >> --- >> arch/x86/kvm/kvm_onhyperv.c | 23 +++++++++++++++++++++++ >> arch/x86/kvm/kvm_onhyperv.h | 5 +++++ >> arch/x86/kvm/svm/svm.c | 18 +++++++++++++++--- >> 3 files changed, 43 insertions(+), 3 deletions(-) >> >> diff --git a/arch/x86/kvm/kvm_onhyperv.c b/arch/x86/kvm/kvm_onhyperv.c >> index 482d6639ef88..036e04c0a161 100644 >> --- a/arch/x86/kvm/kvm_onhyperv.c >> +++ b/arch/x86/kvm/kvm_onhyperv.c >> @@ -94,6 +94,29 @@ int hv_remote_flush_tlb(struct kvm *kvm) >> } >> EXPORT_SYMBOL_GPL(hv_remote_flush_tlb); >> >> +void hv_flush_tlb_current(struct kvm_vcpu *vcpu) >> +{ >> + struct kvm_arch *kvm_arch = &vcpu->kvm->arch; >> + hpa_t root_tdp = vcpu->arch.mmu->root.hpa; >> + >> + if (kvm_x86_ops.tlb_remote_flush == hv_remote_flush_tlb && VALID_PAGE(root_tdp)) { >> + spin_lock(&kvm_arch->hv_root_tdp_lock); >> + if (kvm_arch->hv_root_tdp != root_tdp) { >> + hyperv_flush_guest_mapping(root_tdp); >> + kvm_arch->hv_root_tdp = root_tdp; > > In a vacuum, accessing kvm_arch->hv_root_tdp in the flush path is wrong. This > likely fixes the issues you are seeing because the KVM bug only affects the case > when KVM is loading a new root (that used to be valid), in which case hv_root_tdp > is guaranteed to be different. But KVM should not rely on that behavior, i.e. if > KVM says flush, then we flush. There might be scenarios where the flush is > unnecessary, but those flushes should be elided by the code that knows the flush > is unnecessary, not in this common code just because the target root is the > globally shared root. > > Somewhat of a moot point, but setting hv_root_tdp to root_tdp is also wrong. KVM's > behavior is that hv_root_tdp points at a valid root if and only if all vCPUs share > said root. E.g. invoking this when vCPUs have different roots will "corrupt" > hv_root_tdp and possibly cause a remote flush to do the wrong thing. > >> + } >> + spin_unlock(&kvm_arch->hv_root_tdp_lock); >> + } >> +} >> +EXPORT_SYMBOL_GPL(hv_flush_tlb_current); >> + >> +void hv_flush_tlb_all(struct kvm_vcpu *vcpu) >> +{ >> + if (WARN_ON_ONCE(kvm_x86_ops.tlb_remote_flush == hv_remote_flush_tlb)) > > Hmm, looking at the KVM code, AFAICT KVM only enables enlightened_npt_tlb for L1 > (L1 from KVM's perspective) as svm_hv_init_vmcb() is only ever called with vmcb01, > never with vmcb02. I don't know if that's intentional, but I do think it means > KVM can skip the Hyper-V flush for vmcb02 and instead rely on the ASID flush, > i.e. KVM can do the Hyper-V iff enlightened_npt_tlb is set in the current VMCB. > And that should continue to work if KVM does ever enabled enlightened_npt_tlb for L2. > >> + hv_remote_flush_tlb(vcpu->kvm); >> +} >> +EXPORT_SYMBOL_GPL(hv_flush_tlb_all); > > I'd rather not add helpers to the common KVM code. I do like minimizing the amount > of #ifdeffery, but defining these as common helpers makes it seem like VMX-on-HyperV > is broken, i.e. raises the question of why VMX doesn't use these helpers when running > on Hyper-V. > > I'm thinking this? > > --- > arch/x86/kvm/svm/svm.c | 39 ++++++++++++++++++++++++++++++--- > arch/x86/kvm/svm/svm_onhyperv.h | 7 ++++++ > 2 files changed, 43 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 70183d2271b5..ab97fe8f1d81 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -3746,7 +3746,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu) > svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); > } > > -static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) > +static void svm_flush_tlb_asid(struct kvm_vcpu *vcpu) > { > struct vcpu_svm *svm = to_svm(vcpu); > > @@ -3770,6 +3770,39 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) > svm->current_vmcb->asid_generation--; > } > > +static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) > +{ > +#if IS_ENABLED(CONFIG_HYPERV) > + hpa_t root_tdp = vcpu->arch.mmu->root.hpa; > + > + /* > + * When running on Hyper-V with EnlightenedNptTlb enabled, explicitly > + * flush the NPT mappings via hypercall as flushing the ASID only > + * affects virtual to physical mappings, it does not invalidate guest > + * physical to host physical mappings. > + */ > + if (svm_hv_is_enlightened_tlb_enabled(vcpu) && VALID_PAGE(root_tdp)) > + hyperv_flush_guest_mapping(root_tdp); > +#endif > + svm_flush_tlb_asid(vcpu); > +} > + > +static void svm_flush_tlb_all(struct kvm_vcpu *vcpu) > +{ > +#if IS_ENABLED(CONFIG_HYPERV) > + /* > + * When running on Hyper-V with EnlightenedNptTlb enabled, remote TLB > + * flushes should be routed to hv_remote_flush_tlb() without requesting > + * a "regular" remote flush. Reaching this point means either there's > + * a KVM bug or a prior hv_remote_flush_tlb() call failed, both of > + * which might be fatal to the the guest. Yell, but try to recover. > + */ > + if (WARN_ON_ONCE(svm_hv_is_enlightened_tlb_enabled(vcpu))) > + hv_remote_flush_tlb(vcpu->kvm); > +#endif > + svm_flush_tlb_asid(vcpu); > +} > + > static void svm_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t gva) > { > struct vcpu_svm *svm = to_svm(vcpu); > @@ -4762,10 +4795,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { > .set_rflags = svm_set_rflags, > .get_if_flag = svm_get_if_flag, > > - .flush_tlb_all = svm_flush_tlb_current, > + .flush_tlb_all = svm_flush_tlb_all, > .flush_tlb_current = svm_flush_tlb_current, > .flush_tlb_gva = svm_flush_tlb_gva, > - .flush_tlb_guest = svm_flush_tlb_current, > + .flush_tlb_guest = svm_flush_tlb_asid, > > .vcpu_pre_run = svm_vcpu_pre_run, > .vcpu_run = svm_vcpu_run, > diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h > index cff838f15db5..d91e019fb7da 100644 > --- a/arch/x86/kvm/svm/svm_onhyperv.h > +++ b/arch/x86/kvm/svm/svm_onhyperv.h > @@ -15,6 +15,13 @@ static struct kvm_x86_ops svm_x86_ops; > > int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu); > > +static inline bool svm_hv_is_enlightened_tlb_enabled(struct kvm_vcpu *vcpu) > +{ > + struct hv_vmcb_enlightenments *hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments; > + > + return !!hve->hv_enlightenments_control.enlightened_npt_tlb; In theory, we should not look at Hyper-V enlightenments in VMCB control just because our kernel has CONFIG_HYPERV enabled. I'd suggest we add a real check that we're running on Hyper-V and we can do it the same way it is done in svm_hv_hardware_setup()/svm_hv_init_vmcb(): return (ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) && !!hve->hv_enlightenments_control.enlightened_npt_tlb; (untested). > +} > + > static inline void svm_hv_init_vmcb(struct vmcb *vmcb) > { > struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; > > base-commit: 50f13998451effea5c5fdc70fe576f8b435d6224 -- Vitaly