Re: [PATCH v2 30/54] KVM: x86/pmu: Implement the save/restore of PMU state for Intel CPU

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, May 14, 2024, Mi, Dapeng wrote:
> 
> On 5/6/2024 1:29 PM, Mingwei Zhang wrote:
> > Implement the save/restore of PMU state for pasthrough PMU in Intel. In
> > passthrough mode, KVM owns exclusively the PMU HW when control flow goes to
> > the scope of passthrough PMU. Thus, KVM needs to save the host PMU state
> > and gains the full HW PMU ownership. On the contrary, host regains the
> > ownership of PMU HW from KVM when control flow leaves the scope of
> > passthrough PMU.
> >
> > Implement PMU context switches for Intel CPUs and opptunistically use
> > rdpmcl() instead of rdmsrl() when reading counters since the former has
> > lower latency in Intel CPUs.
> 
> It looks rdpmcl() optimization is removed from this patch, right? The
> description is not identical with code.

That is correct. I was debugging this for a while and since we don't
have rdpmcl_safe(), one of the bug cause rdpmc() to crash the kernel.
Really don't like rdpmc(). But will add it back in next version.

> 
> 
> >
> > Co-developed-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx>
> > Signed-off-by: Mingwei Zhang <mizhang@xxxxxxxxxx>
> 
> The commit tags doesn't follow the rules in Linux document, we need to
> change it in next version.

Will do.
> 
> https://docs.kernel.org/process/submitting-patches.html#:~:text=Co%2Ddeveloped%2Dby%3A%20states,work%20on%20a%20single%20patch.
> 
> > ---
> >  arch/x86/kvm/pmu.c           | 46 ++++++++++++++++++++++++++++++++++++
> >  arch/x86/kvm/vmx/pmu_intel.c | 41 +++++++++++++++++++++++++++++++-
> >  2 files changed, 86 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
> > index 782b564bdf96..13197472e31d 100644
> > --- a/arch/x86/kvm/pmu.c
> > +++ b/arch/x86/kvm/pmu.c
> > @@ -1068,14 +1068,60 @@ void kvm_pmu_passthrough_pmu_msrs(struct kvm_vcpu *vcpu)
> >  
> >  void kvm_pmu_save_pmu_context(struct kvm_vcpu *vcpu)
> >  {
> > +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> > +	struct kvm_pmc *pmc;
> > +	u32 i;
> > +
> >  	lockdep_assert_irqs_disabled();
> >  
> >  	static_call_cond(kvm_x86_pmu_save_pmu_context)(vcpu);
> > +
> > +	/*
> > +	 * Clear hardware selector MSR content and its counter to avoid
> > +	 * leakage and also avoid this guest GP counter get accidentally
> > +	 * enabled during host running when host enable global ctrl.
> > +	 */
> > +	for (i = 0; i < pmu->nr_arch_gp_counters; i++) {
> > +		pmc = &pmu->gp_counters[i];
> > +		rdmsrl(pmc->msr_counter, pmc->counter);
> 
> I understood we want to use common code to manipulate PMU MSRs as much as
> possible, but I don't think we should sacrifice performance. rdpmcl() has
> better performance than rdmsrl(). If AMD CPUs doesn't support rdpmc
> instruction, I think we should move this into vendor specific
> xxx_save/restore_pmu_context helpers().
> 
> 
> > +		rdmsrl(pmc->msr_eventsel, pmc->eventsel);
> > +		if (pmc->counter)
> > +			wrmsrl(pmc->msr_counter, 0);
> > +		if (pmc->eventsel)
> > +			wrmsrl(pmc->msr_eventsel, 0);
> > +	}
> > +
> > +	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
> > +		pmc = &pmu->fixed_counters[i];
> > +		rdmsrl(pmc->msr_counter, pmc->counter);
> > +		if (pmc->counter)
> > +			wrmsrl(pmc->msr_counter, 0);
> > +	}
> >  }
> >  
> >  void kvm_pmu_restore_pmu_context(struct kvm_vcpu *vcpu)
> >  {
> > +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> > +	struct kvm_pmc *pmc;
> > +	int i;
> > +
> >  	lockdep_assert_irqs_disabled();
> >  
> >  	static_call_cond(kvm_x86_pmu_restore_pmu_context)(vcpu);
> > +
> > +	/*
> > +	 * No need to zero out unexposed GP/fixed counters/selectors since RDPMC
> > +	 * in this case will be intercepted. Accessing to these counters and
> > +	 * selectors will cause #GP in the guest.
> > +	 */
> > +	for (i = 0; i < pmu->nr_arch_gp_counters; i++) {
> > +		pmc = &pmu->gp_counters[i];
> > +		wrmsrl(pmc->msr_counter, pmc->counter);
> > +		wrmsrl(pmc->msr_eventsel, pmu->gp_counters[i].eventsel);
> > +	}
> > +
> > +	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
> > +		pmc = &pmu->fixed_counters[i];
> > +		wrmsrl(pmc->msr_counter, pmc->counter);
> > +	}
> >  }
> > diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> > index 7852ba25a240..a23cf9ca224e 100644
> > --- a/arch/x86/kvm/vmx/pmu_intel.c
> > +++ b/arch/x86/kvm/vmx/pmu_intel.c
> > @@ -572,7 +572,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
> >  	}
> >  
> >  	for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
> > -		pmu->fixed_counters[i].msr_eventsel = MSR_CORE_PERF_FIXED_CTR_CTRL;
> > +		pmu->fixed_counters[i].msr_eventsel = 0;
> Why to initialize msr_eventsel to 0 instead of the real MSR address here?
> >  		pmu->fixed_counters[i].msr_counter = MSR_CORE_PERF_FIXED_CTR0 + i;
> >  	}
> >  }
> > @@ -799,6 +799,43 @@ static void intel_passthrough_pmu_msrs(struct kvm_vcpu *vcpu)
> >  	vmx_set_intercept_for_msr(vcpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, MSR_TYPE_RW, msr_intercept);
> >  }
> >  
> > +static void intel_save_guest_pmu_context(struct kvm_vcpu *vcpu)
> > +{
> > +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> > +
> > +	/* Global ctrl register is already saved at VM-exit. */
> > +	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, pmu->global_status);
> > +	/* Clear hardware MSR_CORE_PERF_GLOBAL_STATUS MSR, if non-zero. */
> > +	if (pmu->global_status)
> > +		wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, pmu->global_status);
> > +
> > +	rdmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, pmu->fixed_ctr_ctrl);
> > +	/*
> > +	 * Clear hardware FIXED_CTR_CTRL MSR to avoid information leakage and
> > +	 * also avoid these guest fixed counters get accidentially enabled
> > +	 * during host running when host enable global ctrl.
> > +	 */
> > +	if (pmu->fixed_ctr_ctrl)
> > +		wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
> > +}
> > +
> > +static void intel_restore_guest_pmu_context(struct kvm_vcpu *vcpu)
> > +{
> > +	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> > +	u64 global_status, toggle;
> > +
> > +	/* Clear host global_ctrl MSR if non-zero. */
> > +	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
> > +	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, global_status);
> > +	toggle = pmu->global_status ^ global_status;
> > +	if (global_status & toggle)
> > +		wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, global_status & toggle);
> > +	if (pmu->global_status & toggle)
> > +		wrmsrl(MSR_CORE_PERF_GLOBAL_STATUS_SET, pmu->global_status & toggle);
> > +
> > +	wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, pmu->fixed_ctr_ctrl);
> > +}
> > +
> >  struct kvm_pmu_ops intel_pmu_ops __initdata = {
> >  	.rdpmc_ecx_to_pmc = intel_rdpmc_ecx_to_pmc,
> >  	.msr_idx_to_pmc = intel_msr_idx_to_pmc,
> > @@ -812,6 +849,8 @@ struct kvm_pmu_ops intel_pmu_ops __initdata = {
> >  	.cleanup = intel_pmu_cleanup,
> >  	.is_rdpmc_passthru_allowed = intel_is_rdpmc_passthru_allowed,
> >  	.passthrough_pmu_msrs = intel_passthrough_pmu_msrs,
> > +	.save_pmu_context = intel_save_guest_pmu_context,
> > +	.restore_pmu_context = intel_restore_guest_pmu_context,
> >  	.EVENTSEL_EVENT = ARCH_PERFMON_EVENTSEL_EVENT,
> >  	.MAX_NR_GP_COUNTERS = KVM_INTEL_PMC_MAX_GENERIC,
> >  	.MIN_NR_GP_COUNTERS = 1,
> 




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux