[PATCH v4 04/10] KVM/x86: intel_pmu_lbr_enable

Wei Wang <wei.w.wang@xxxxxxxxx> · Wed, 26 Dec 2018 17:25:32 +0800

The lbr stack is architecturally specific, for example, SKX has 32 lbr
stack entries while HSW has 16 entries, so a HSW guest running on a SKX
machine may not get accurate perf results. Currently, we forbid the
guest lbr enabling when the guest and host see different lbr stack
entries.

Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx>
Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
---
 arch/x86/kvm/cpuid.h     |   8 ++++
 arch/x86/kvm/pmu.c       |   8 ++++
 arch/x86/kvm/pmu.h       |   2 +
 arch/x86/kvm/pmu_intel.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c       |   3 +-
 5 files changed, 136 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 9a327d5..92bdc7d 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -123,6 +123,14 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu)
 	return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx;
 }
 
+static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 0, 0);
+	return best && best->ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx;
+}
+
 static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 58ead7d..b438ffa 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -299,6 +299,14 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
 	return 0;
 }
 
+bool kvm_pmu_lbr_enable(struct kvm_vcpu *vcpu)
+{
+	if (guest_cpuid_is_intel(vcpu))
+		return kvm_x86_ops->pmu_ops->lbr_enable(vcpu);
+
+	return false;
+}
+
 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu)
 {
 	if (lapic_in_kernel(vcpu))
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index ba8898e..5f3c7a4 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -28,6 +28,7 @@ struct kvm_pmu_ops {
 	struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx);
 	int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx);
 	bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
+	bool (*lbr_enable)(struct kvm_vcpu *vcpu);
 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 	int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 	void (*refresh)(struct kvm_vcpu *vcpu);
@@ -106,6 +107,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
 void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
 void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
 
+bool kvm_pmu_lbr_enable(struct kvm_vcpu *vcpu);
 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index 5ab4a36..c04cb6d 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -15,6 +15,7 @@
 #include <linux/kvm_host.h>
 #include <linux/perf_event.h>
 #include <asm/perf_event.h>
+#include <asm/intel-family.h>
 #include "x86.h"
 #include "cpuid.h"
 #include "lapic.h"
@@ -164,6 +165,121 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
 	return ret;
 }
 
+static bool intel_pmu_lbr_enable(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = vcpu->kvm;
+	u8 vcpu_model = guest_cpuid_model(vcpu);
+	unsigned int vcpu_lbr_nr;
+
+	if (x86_perf_get_lbr_stack(&kvm->arch.lbr_stack))
+		return false;
+
+	if (guest_cpuid_family(vcpu) != boot_cpu_data.x86)
+		return false;
+
+	/*
+	 * It could be possible that people have vcpus of old model run on
+	 * physcal cpus of newer model, for example a BDW guest on a SKX
+	 * machine (but not possible to be the other way around).
+	 * The BDW guest may not get accurate results on a SKX machine as it
+	 * only reads 16 entries of the lbr stack while there are 32 entries
+	 * of recordings. So we currently forbid the lbr enabling when the
+	 * vcpu and physical cpu see different lbr stack entries.
+	 */
+	switch (vcpu_model) {
+	case INTEL_FAM6_CORE2_MEROM:
+	case INTEL_FAM6_CORE2_MEROM_L:
+	case INTEL_FAM6_CORE2_PENRYN:
+	case INTEL_FAM6_CORE2_DUNNINGTON:
+		/* intel_pmu_lbr_init_core() */
+		vcpu_lbr_nr = 4;
+		break;
+	case INTEL_FAM6_NEHALEM:
+	case INTEL_FAM6_NEHALEM_EP:
+	case INTEL_FAM6_NEHALEM_EX:
+		/* intel_pmu_lbr_init_nhm() */
+		vcpu_lbr_nr = 16;
+		break;
+	case INTEL_FAM6_ATOM_BONNELL:
+	case INTEL_FAM6_ATOM_BONNELL_MID:
+	case INTEL_FAM6_ATOM_SALTWELL:
+	case INTEL_FAM6_ATOM_SALTWELL_MID:
+	case INTEL_FAM6_ATOM_SALTWELL_TABLET:
+		/* intel_pmu_lbr_init_atom() */
+		vcpu_lbr_nr = 8;
+		break;
+	case INTEL_FAM6_ATOM_SILVERMONT:
+	case INTEL_FAM6_ATOM_SILVERMONT_X:
+	case INTEL_FAM6_ATOM_SILVERMONT_MID:
+	case INTEL_FAM6_ATOM_AIRMONT:
+	case INTEL_FAM6_ATOM_AIRMONT_MID:
+		/* intel_pmu_lbr_init_slm() */
+		vcpu_lbr_nr = 8;
+		break;
+	case INTEL_FAM6_ATOM_GOLDMONT:
+	case INTEL_FAM6_ATOM_GOLDMONT_X:
+		/* intel_pmu_lbr_init_skl(); */
+		vcpu_lbr_nr = 32;
+		break;
+	case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+		/* intel_pmu_lbr_init_skl()*/
+		vcpu_lbr_nr = 32;
+		break;
+	case INTEL_FAM6_WESTMERE:
+	case INTEL_FAM6_WESTMERE_EP:
+	case INTEL_FAM6_WESTMERE_EX:
+		/* intel_pmu_lbr_init_nhm() */
+		vcpu_lbr_nr = 16;
+		break;
+	case INTEL_FAM6_SANDYBRIDGE:
+	case INTEL_FAM6_SANDYBRIDGE_X:
+		/* intel_pmu_lbr_init_snb() */
+		vcpu_lbr_nr = 16;
+		break;
+	case INTEL_FAM6_IVYBRIDGE:
+	case INTEL_FAM6_IVYBRIDGE_X:
+		/* intel_pmu_lbr_init_snb() */
+		vcpu_lbr_nr = 16;
+		break;
+	case INTEL_FAM6_HASWELL_CORE:
+	case INTEL_FAM6_HASWELL_X:
+	case INTEL_FAM6_HASWELL_ULT:
+	case INTEL_FAM6_HASWELL_GT3E:
+		/* intel_pmu_lbr_init_hsw() */
+		vcpu_lbr_nr = 16;
+		break;
+	case INTEL_FAM6_BROADWELL_CORE:
+	case INTEL_FAM6_BROADWELL_XEON_D:
+	case INTEL_FAM6_BROADWELL_GT3E:
+	case INTEL_FAM6_BROADWELL_X:
+		/* intel_pmu_lbr_init_hsw() */
+		vcpu_lbr_nr = 16;
+		break;
+	case INTEL_FAM6_XEON_PHI_KNL:
+	case INTEL_FAM6_XEON_PHI_KNM:
+		/* intel_pmu_lbr_init_knl() */
+		vcpu_lbr_nr = 8;
+		break;
+	case INTEL_FAM6_SKYLAKE_MOBILE:
+	case INTEL_FAM6_SKYLAKE_DESKTOP:
+	case INTEL_FAM6_SKYLAKE_X:
+	case INTEL_FAM6_KABYLAKE_MOBILE:
+	case INTEL_FAM6_KABYLAKE_DESKTOP:
+		/* intel_pmu_lbr_init_skl() */
+		vcpu_lbr_nr = 32;
+		break;
+	default:
+		vcpu_lbr_nr = 0;
+		pr_warn("%s: vcpu model not supported %d\n", __func__,
+			vcpu_model);
+	}
+
+	if (vcpu_lbr_nr != kvm->arch.lbr_stack.nr)
+		return false;
+
+	return true;
+}
+
 static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -350,6 +466,7 @@ struct kvm_pmu_ops intel_pmu_ops = {
 	.msr_idx_to_pmc = intel_msr_idx_to_pmc,
 	.is_valid_msr_idx = intel_is_valid_msr_idx,
 	.is_valid_msr = intel_is_valid_msr,
+	.lbr_enable = intel_pmu_lbr_enable,
 	.get_msr = intel_pmu_get_msr,
 	.set_msr = intel_pmu_set_msr,
 	.refresh = intel_pmu_refresh,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 50efee4..02e29fd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4505,8 +4505,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		break;
 	case KVM_CAP_X86_GUEST_LBR:
 		r = -EINVAL;
-		if (cap->args[0] &&
-		    x86_perf_get_lbr_stack(&kvm->arch.lbr_stack)) {
+		if (cap->args[0] && !kvm_pmu_lbr_enable(kvm->vcpus[0])) {
 			pr_err("Failed to enable the guest lbr feature\n");
 			break;
 		}
-- 
2.7.4