Re: [PATCH v2 30/54] KVM: x86/pmu: Implement the save/restore of PMU state for Intel CPU

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 5/6/2024 1:29 PM, Mingwei Zhang wrote:
> Implement the save/restore of PMU state for pasthrough PMU in Intel. In
> passthrough mode, KVM owns exclusively the PMU HW when control flow goes to
> the scope of passthrough PMU. Thus, KVM needs to save the host PMU state
> and gains the full HW PMU ownership. On the contrary, host regains the
> ownership of PMU HW from KVM when control flow leaves the scope of
> passthrough PMU.
>
> Implement PMU context switches for Intel CPUs and opptunistically use
> rdpmcl() instead of rdmsrl() when reading counters since the former has
> lower latency in Intel CPUs.

It looks rdpmcl() optimization is removed from this patch, right? The
description is not identical with code.


>
> Co-developed-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
> Signed-off-by: Mingwei Zhang <mizhang@xxxxxxxxxx>

The commit tags doesn't follow the rules in Linux document, we need to
change it in next version.

https://docs.kernel.org/process/submitting-patches.html#:~:text=Co%2Ddeveloped%2Dby%3A%20states,work%20on%20a%20single%20patch.

> ---
>  arch/x86/kvm/pmu.c           | 46 ++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/vmx/pmu_intel.c | 41 +++++++++++++++++++++++++++++++-
>  2 files changed, 86 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> index 782b564bdf96..13197472e31d 100644
> --- a/arch/x86/kvm/pmu.c
> +++ b/arch/x86/kvm/pmu.c
> @@ -1068,14 +1068,60 @@ void kvm_pmu_passthrough_pmu_msrs(struct kvm_vcpu *vcpu)
>  
>  void kvm_pmu_save_pmu_context(struct kvm_vcpu *vcpu)
>  {
> +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +	struct kvm_pmc *pmc;
> +	u32 i;
> +
>  	lockdep_assert_irqs_disabled();
>  
>  	static_call_cond(kvm_x86_pmu_save_pmu_context)(vcpu);
> +
> +	/*
> +	 * Clear hardware selector MSR content and its counter to avoid
> +	 * leakage and also avoid this guest GP counter get accidentally
> +	 * enabled during host running when host enable global ctrl.
> +	 */
> +	for (i = 0; i < pmu->nr_arch_gp_counters; i++) {
> +		pmc = &pmu->gp_counters[i];
> +		rdmsrl(pmc->msr_counter, pmc->counter);

I understood we want to use common code to manipulate PMU MSRs as much as
possible, but I don't think we should sacrifice performance. rdpmcl() has
better performance than rdmsrl(). If AMD CPUs doesn't support rdpmc
instruction, I think we should move this into vendor specific
xxx_save/restore_pmu_context helpers().


> +		rdmsrl(pmc->msr_eventsel, pmc->eventsel);
> +		if (pmc->counter)
> +			wrmsrl(pmc->msr_counter, 0);
> +		if (pmc->eventsel)
> +			wrmsrl(pmc->msr_eventsel, 0);
> +	}
> +
> +	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
> +		pmc = &pmu->fixed_counters[i];
> +		rdmsrl(pmc->msr_counter, pmc->counter);
> +		if (pmc->counter)
> +			wrmsrl(pmc->msr_counter, 0);
> +	}
>  }
>  
>  void kvm_pmu_restore_pmu_context(struct kvm_vcpu *vcpu)
>  {
> +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +	struct kvm_pmc *pmc;
> +	int i;
> +
>  	lockdep_assert_irqs_disabled();
>  
>  	static_call_cond(kvm_x86_pmu_restore_pmu_context)(vcpu);
> +
> +	/*
> +	 * No need to zero out unexposed GP/fixed counters/selectors since RDPMC
> +	 * in this case will be intercepted. Accessing to these counters and
> +	 * selectors will cause #GP in the guest.
> +	 */
> +	for (i = 0; i < pmu->nr_arch_gp_counters; i++) {
> +		pmc = &pmu->gp_counters[i];
> +		wrmsrl(pmc->msr_counter, pmc->counter);
> +		wrmsrl(pmc->msr_eventsel, pmu->gp_counters[i].eventsel);
> +	}
> +
> +	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
> +		pmc = &pmu->fixed_counters[i];
> +		wrmsrl(pmc->msr_counter, pmc->counter);
> +	}
>  }
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 7852ba25a240..a23cf9ca224e 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -572,7 +572,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
>  	}
>  
>  	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
> -		pmu->fixed_counters[i].msr_eventsel = MSR_CORE_PERF_FIXED_CTR_CTRL;
> +		pmu->fixed_counters[i].msr_eventsel = 0;
Why to initialize msr_eventsel to 0 instead of the real MSR address here?
>  		pmu->fixed_counters[i].msr_counter = MSR_CORE_PERF_FIXED_CTR0 + i;
>  	}
>  }
> @@ -799,6 +799,43 @@ static void intel_passthrough_pmu_msrs(struct kvm_vcpu *vcpu)
>  	vmx_set_intercept_for_msr(vcpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, MSR_TYPE_RW, msr_intercept);
>  }
>  
> +static void intel_save_guest_pmu_context(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +
> +	/* Global ctrl register is already saved at VM-exit. */
> +	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, pmu->global_status);
> +	/* Clear hardware MSR_CORE_PERF_GLOBAL_STATUS MSR, if non-zero. */
> +	if (pmu->global_status)
> +		wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, pmu->global_status);
> +
> +	rdmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, pmu->fixed_ctr_ctrl);
> +	/*
> +	 * Clear hardware FIXED_CTR_CTRL MSR to avoid information leakage and
> +	 * also avoid these guest fixed counters get accidentially enabled
> +	 * during host running when host enable global ctrl.
> +	 */
> +	if (pmu->fixed_ctr_ctrl)
> +		wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
> +}
> +
> +static void intel_restore_guest_pmu_context(struct kvm_vcpu *vcpu)
> +{
> +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> +	u64 global_status, toggle;
> +
> +	/* Clear host global_ctrl MSR if non-zero. */
> +	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
> +	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, global_status);
> +	toggle = pmu->global_status ^ global_status;
> +	if (global_status & toggle)
> +		wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, global_status & toggle);
> +	if (pmu->global_status & toggle)
> +		wrmsrl(MSR_CORE_PERF_GLOBAL_STATUS_SET, pmu->global_status & toggle);
> +
> +	wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, pmu->fixed_ctr_ctrl);
> +}
> +
>  struct kvm_pmu_ops intel_pmu_ops __initdata = {
>  	.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
>  	.msr_idx_to_pmc = intel_msr_idx_to_pmc,
> @@ -812,6 +849,8 @@ struct kvm_pmu_ops intel_pmu_ops __initdata = {
>  	.cleanup = intel_pmu_cleanup,
>  	.is_rdpmc_passthru_allowed = intel_is_rdpmc_passthru_allowed,
>  	.passthrough_pmu_msrs = intel_passthrough_pmu_msrs,
> +	.save_pmu_context = intel_save_guest_pmu_context,
> +	.restore_pmu_context = intel_restore_guest_pmu_context,
>  	.EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT,
>  	.MAX_NR_GP_COUNTERS = KVM_INTEL_PMC_MAX_GENERIC,
>  	.MIN_NR_GP_COUNTERS = 1,




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux