On Thu, May 14, 2020 at 04:30:53PM +0800, Like Xu wrote: > diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c > index ea4faae56473..db185dca903d 100644 > --- a/arch/x86/kvm/vmx/pmu_intel.c > +++ b/arch/x86/kvm/vmx/pmu_intel.c > @@ -646,6 +646,43 @@ static void intel_pmu_lbr_cleanup(struct kvm_vcpu *vcpu) > intel_pmu_free_lbr_event(vcpu); > } > > +static bool intel_pmu_lbr_is_availabile(struct kvm_vcpu *vcpu) > +{ > + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); > + > + if (!pmu->lbr_event) > + return false; > + > + if (event_is_oncpu(pmu->lbr_event)) { > + intel_pmu_intercept_lbr_msrs(vcpu, false); > + } else { > + intel_pmu_intercept_lbr_msrs(vcpu, true); > + return false; > + } > + > + return true; > +} This is unreadable gunk, what? > +/* > + * Higher priority host perf events (e.g. cpu pinned) could reclaim the > + * pmu resources (e.g. LBR) that were assigned to the guest. This is > + * usually done via ipi calls (more details in perf_install_in_context). > + * > + * Before entering the non-root mode (with irq disabled here), double > + * confirm that the pmu features enabled to the guest are not reclaimed > + * by higher priority host events. Otherwise, disallow vcpu's access to > + * the reclaimed features. > + */ > +static void intel_pmu_availability_check(struct kvm_vcpu *vcpu) > +{ > + lockdep_assert_irqs_disabled(); > + > + if (lbr_is_enabled(vcpu) && !intel_pmu_lbr_is_availabile(vcpu) && > + (vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR)) > + pr_warn_ratelimited("kvm: vcpu-%d: LBR is temporarily unavailable.\n", > + vcpu->vcpu_id); More unreadable nonsense; when the events go into ERROR state, it's a permanent fail, they'll not come back. > +} > + > struct kvm_pmu_ops intel_pmu_ops = { > .find_arch_event = intel_find_arch_event, > .find_fixed_event = intel_find_fixed_event, > @@ -662,4 +699,5 @@ struct kvm_pmu_ops intel_pmu_ops = { > .reset = intel_pmu_reset, > .deliver_pmi = intel_pmu_deliver_pmi, > .lbr_cleanup = intel_pmu_lbr_cleanup, > + .availability_check = intel_pmu_availability_check, > }; > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 9969d663826a..80d036c5f64a 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -6696,8 +6696,10 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) > > pt_guest_enter(vmx); > > - if (vcpu_to_pmu(vcpu)->version) > + if (vcpu_to_pmu(vcpu)->version) { > atomic_switch_perf_msrs(vmx); > + kvm_x86_ops.pmu_ops->availability_check(vcpu); > + } AFAICT you just did a call out to the kvm_pmu crud in atomic_switch_perf_msrs(), why do another call?