Re: [PATCH v11 10/11] KVM: x86/pmu: Check guest LBR availability in case host reclaims them

Peter Zijlstra <peterz@xxxxxxxxxxxxx> · Tue, 19 May 2020 13:15:59 +0200

On Thu, May 14, 2020 at 04:30:53PM +0800, Like Xu wrote:

> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index ea4faae56473..db185dca903d 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -646,6 +646,43 @@ static void intel_pmu_lbr_cleanup(struct kvm_vcpu *vcpu)
>  		intel_pmu_free_lbr_event(vcpu);
>  }
>  
> +static bool intel_pmu_lbr_is_availabile(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +
> +	if (!pmu->lbr_event)
> +		return false;
> +
> +	if (event_is_oncpu(pmu->lbr_event)) {
> +		intel_pmu_intercept_lbr_msrs(vcpu, false);
> +	} else {
> +		intel_pmu_intercept_lbr_msrs(vcpu, true);
> +		return false;
> +	}
> +
> +	return true;
> +}

This is unreadable gunk, what?

> +/*
> + * Higher priority host perf events (e.g. cpu pinned) could reclaim the
> + * pmu resources (e.g. LBR) that were assigned to the guest. This is
> + * usually done via ipi calls (more details in perf_install_in_context).
> + *
> + * Before entering the non-root mode (with irq disabled here), double
> + * confirm that the pmu features enabled to the guest are not reclaimed
> + * by higher priority host events. Otherwise, disallow vcpu's access to
> + * the reclaimed features.
> + */
> +static void intel_pmu_availability_check(struct kvm_vcpu *vcpu)
> +{
> +	lockdep_assert_irqs_disabled();
> +
> +	if (lbr_is_enabled(vcpu) && !intel_pmu_lbr_is_availabile(vcpu) &&
> +		(vmcs_read64(GUEST_IA32_DEBUGCTL) & DEBUGCTLMSR_LBR))
> +		pr_warn_ratelimited("kvm: vcpu-%d: LBR is temporarily unavailable.\n",
> +			vcpu->vcpu_id);

More unreadable nonsense; when the events go into ERROR state, it's a
permanent fail, they'll not come back.

> +}
> +
>  struct kvm_pmu_ops intel_pmu_ops = {
>  	.find_arch_event = intel_find_arch_event,
>  	.find_fixed_event = intel_find_fixed_event,
> @@ -662,4 +699,5 @@ struct kvm_pmu_ops intel_pmu_ops = {
>  	.reset = intel_pmu_reset,
>  	.deliver_pmi = intel_pmu_deliver_pmi,
>  	.lbr_cleanup = intel_pmu_lbr_cleanup,
> +	.availability_check = intel_pmu_availability_check,
>  };
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 9969d663826a..80d036c5f64a 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6696,8 +6696,10 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  
>  	pt_guest_enter(vmx);
>  
> -	if (vcpu_to_pmu(vcpu)->version)
> +	if (vcpu_to_pmu(vcpu)->version) {
>  		atomic_switch_perf_msrs(vmx);
> +		kvm_x86_ops.pmu_ops->availability_check(vcpu);
> +	}

AFAICT you just did a call out to the kvm_pmu crud in
atomic_switch_perf_msrs(), why do another call?