[RFC] [PATCH v2 4/5] KVM/x86/vPMU: add vCPU scheduling support for hw-assigned vPMC

Like Xu <like.xu@xxxxxxxxxxxxxxx> · Sat, 23 Mar 2019 22:18:07 +0800

This patch would dispatch the generic vPMU request to intel ones.
In the vCPU scheduling context, this patch would save and restore
those in-use assigned counter states without interference.

The intel_pmu_sched_in() would release the event if its hw_life_count
has been counted down to zero OR if vPMC is disabled, it is considered
to be no longer used and its hw_life_count is decreased by one.

Signed-off-by: Wang Wei <wei.w.wang@xxxxxxxxx>
Signed-off-by: Like Xu <like.xu@xxxxxxxxxxxxxxx>
---
 arch/x86/kvm/pmu.c           | 15 ++++++++
 arch/x86/kvm/pmu.h           | 22 ++++++++++++
 arch/x86/kvm/vmx/pmu_intel.c | 81 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c           |  6 ++++
 4 files changed, 124 insertions(+)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 58ead7d..672e268 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -284,6 +284,9 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
 	struct kvm_pmc *pmc;
 	u64 ctr_val;
 
+	if (kvm_x86_ops->pmu_ops->pmc_read_counter)
+		return kvm_x86_ops->pmu_ops->pmc_read_counter(vcpu, idx, data);
+
 	if (is_vmware_backdoor_pmc(idx))
 		return kvm_pmu_rdpmc_vmware(vcpu, idx, data);
 
@@ -337,6 +340,18 @@ void kvm_pmu_reset(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->pmu_ops->reset(vcpu);
 }
 
+void kvm_pmu_sched_out(struct kvm_vcpu *vcpu)
+{
+	if (kvm_x86_ops->pmu_ops->sched_out)
+		kvm_x86_ops->pmu_ops->sched_out(vcpu);
+}
+
+void kvm_pmu_sched_in(struct kvm_vcpu *vcpu)
+{
+	if (kvm_x86_ops->pmu_ops->sched_in)
+		kvm_x86_ops->pmu_ops->sched_in(vcpu);
+}
+
 void kvm_pmu_init(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index ba8898e..de68ff0 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -33,6 +33,12 @@ struct kvm_pmu_ops {
 	void (*refresh)(struct kvm_vcpu *vcpu);
 	void (*init)(struct kvm_vcpu *vcpu);
 	void (*reset)(struct kvm_vcpu *vcpu);
+	bool (*pmc_is_assigned)(struct kvm_pmc *pmc);
+	void (*pmc_stop_counter)(struct kvm_pmc *pmc);
+	int (*pmc_read_counter)(struct kvm_vcpu *vcpu,
+		unsigned int idx, u64 *data);
+	void (*sched_out)(struct kvm_vcpu *vcpu);
+	void (*sched_in)(struct kvm_vcpu *vcpu);
 };
 
 static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
@@ -54,8 +60,22 @@ static inline u64 pmc_read_counter(struct kvm_pmc *pmc)
 	return counter & pmc_bitmask(pmc);
 }
 
+static inline bool pmc_is_assigned(struct kvm_pmc *pmc)
+{
+	if (kvm_x86_ops->pmu_ops->pmc_is_assigned)
+		return kvm_x86_ops->pmu_ops->pmc_is_assigned(pmc);
+
+	return false;
+}
+
 static inline void pmc_stop_counter(struct kvm_pmc *pmc)
 {
+	if (kvm_x86_ops->pmu_ops->pmc_stop_counter) {
+		if (pmc_is_assigned(pmc))
+			rdmsrl(pmc->perf_event->hw.event_base, pmc->counter);
+		return;
+	}
+
 	if (pmc->perf_event) {
 		pmc->counter = pmc_read_counter(pmc);
 		perf_event_release_kernel(pmc->perf_event);
@@ -117,6 +137,8 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr)
 void kvm_pmu_reset(struct kvm_vcpu *vcpu);
 void kvm_pmu_init(struct kvm_vcpu *vcpu);
 void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
+void kvm_pmu_sched_out(struct kvm_vcpu *vcpu);
+void kvm_pmu_sched_in(struct kvm_vcpu *vcpu);
 
 bool is_vmware_backdoor_pmc(u32 pmc_idx);
 
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 0b69acc..63e00ea 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -522,6 +522,82 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
 		pmu->global_ovf_ctrl = 0;
 }
 
+static void intel_pmu_sched_out(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc;
+	int i;
+
+	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
+		pmc = &pmu->gp_counters[i];
+		intel_pmu_disable_host_counter(pmc);
+		intel_pmu_save_guest_pmc(pmu, pmc->idx);
+	}
+
+	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+		pmc = &pmu->fixed_counters[i];
+		intel_pmu_disable_host_counter(pmc);
+		intel_pmu_save_guest_pmc(pmu, pmc->idx);
+	}
+}
+
+static void intel_pmu_sched_in(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+	struct kvm_pmc *pmc;
+	struct hw_perf_event *hwc;
+	u64 host_ctrl, test, disabled_ctrl_val = 0;
+	int i;
+
+	for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
+		pmc = &pmu->gp_counters[i];
+
+		if (pmc->perf_event && pmc->hw_life_count == 0)
+			intel_pmc_stop_counter(pmc);
+
+		if (!intel_pmc_is_assigned(pmc))
+			continue;
+
+		intel_pmu_restore_guest_pmc(pmu, pmc->idx);
+
+		hwc = &pmc->perf_event->hw;
+		if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
+			u64 mask = 0xfULL <<
+				((hwc->idx - INTEL_PMC_IDX_FIXED) * 4);
+			disabled_ctrl_val &= ~mask;
+			rdmsrl(hwc->config_base, host_ctrl);
+			if (disabled_ctrl_val == host_ctrl)
+				pmc->hw_life_count--;
+		} else if (!(pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE)) {
+			pmc->hw_life_count--;
+		}
+	}
+
+	for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+		pmc = &pmu->fixed_counters[i];
+
+		if (pmc->perf_event && pmc->hw_life_count == 0)
+			intel_pmc_stop_counter(pmc);
+
+		if (!intel_pmc_is_assigned(pmc))
+			continue;
+
+		intel_pmu_restore_guest_pmc(pmu, pmc->idx);
+
+		hwc = &pmc->perf_event->hw;
+		if (hwc->idx < INTEL_PMC_IDX_FIXED) {
+			rdmsrl(hwc->config_base, test);
+			if (!(test & ARCH_PERFMON_EVENTSEL_ENABLE))
+				pmc->hw_life_count--;
+		} else {
+			u8 ctrl = fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+				pmc->idx - INTEL_PMC_IDX_FIXED);
+			if (ctrl == 0)
+				pmc->hw_life_count--;
+		}
+	}
+}
+
 struct kvm_pmu_ops intel_pmu_ops = {
 	.find_arch_event = intel_find_arch_event,
 	.find_fixed_event = intel_find_fixed_event,
@@ -535,4 +611,9 @@ struct kvm_pmu_ops intel_pmu_ops = {
 	.refresh = intel_pmu_refresh,
 	.init = intel_pmu_init,
 	.reset = intel_pmu_reset,
+	.pmc_is_assigned = intel_pmc_is_assigned,
+	.pmc_stop_counter = intel_pmc_stop_counter,
+	.pmc_read_counter = intel_pmc_read_counter,
+	.sched_out = intel_pmu_sched_out,
+	.sched_in = intel_pmu_sched_in,
 };
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 65e4559..f9c715b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9100,9 +9100,15 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
+void kvm_arch_sched_out(struct kvm_vcpu *vcpu)
+{
+	kvm_pmu_sched_out(vcpu);
+}
+
 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->arch.l1tf_flush_l1d = true;
+	kvm_pmu_sched_in(vcpu);
 	kvm_x86_ops->sched_in(vcpu, cpu);
 }
 
-- 
1.8.3.1