The lbr stack is architecturally specific, for example, SKX has 32 lbr stack entries while HSW has 16 entries, so a HSW guest running on a SKX machine may not get accurate perf results. Currently, we forbid the guest lbr enabling when the guest and host see different lbr stack entries. Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> --- arch/x86/kvm/cpuid.h | 8 ++++ arch/x86/kvm/pmu.c | 8 ++++ arch/x86/kvm/pmu.h | 2 + arch/x86/kvm/pmu_intel.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 3 +- 5 files changed, 136 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 9a327d5..92bdc7d 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -123,6 +123,14 @@ static inline bool guest_cpuid_is_amd(struct kvm_vcpu *vcpu) return best && best->ebx == X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx; } +static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0, 0); + return best && best->ebx == X86EMUL_CPUID_VENDOR_GenuineIntel_ebx; +} + static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 58ead7d..b438ffa 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -299,6 +299,14 @@ int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned idx, u64 *data) return 0; } +bool kvm_pmu_lbr_enable(struct kvm_vcpu *vcpu) +{ + if (guest_cpuid_is_intel(vcpu)) + return kvm_x86_ops->pmu_ops->lbr_enable(vcpu); + + return false; +} + void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu) { if (lapic_in_kernel(vcpu)) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index ba8898e..5f3c7a4 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -28,6 +28,7 @@ struct kvm_pmu_ops { struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, unsigned idx); int (*is_valid_msr_idx)(struct kvm_vcpu *vcpu, unsigned idx); bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr); + bool (*lbr_enable)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info); void (*refresh)(struct kvm_vcpu *vcpu); @@ -106,6 +107,7 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel); void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx); void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx); +bool kvm_pmu_lbr_enable(struct kvm_vcpu *vcpu); void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index 5ab4a36..c04cb6d 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -15,6 +15,7 @@ #include <linux/kvm_host.h> #include <linux/perf_event.h> #include <asm/perf_event.h> +#include <asm/intel-family.h> #include "x86.h" #include "cpuid.h" #include "lapic.h" @@ -164,6 +165,121 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) return ret; } +static bool intel_pmu_lbr_enable(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + u8 vcpu_model = guest_cpuid_model(vcpu); + unsigned int vcpu_lbr_nr; + + if (x86_perf_get_lbr_stack(&kvm->arch.lbr_stack)) + return false; + + if (guest_cpuid_family(vcpu) != boot_cpu_data.x86) + return false; + + /* + * It could be possible that people have vcpus of old model run on + * physcal cpus of newer model, for example a BDW guest on a SKX + * machine (but not possible to be the other way around). + * The BDW guest may not get accurate results on a SKX machine as it + * only reads 16 entries of the lbr stack while there are 32 entries + * of recordings. So we currently forbid the lbr enabling when the + * vcpu and physical cpu see different lbr stack entries. + */ + switch (vcpu_model) { + case INTEL_FAM6_CORE2_MEROM: + case INTEL_FAM6_CORE2_MEROM_L: + case INTEL_FAM6_CORE2_PENRYN: + case INTEL_FAM6_CORE2_DUNNINGTON: + /* intel_pmu_lbr_init_core() */ + vcpu_lbr_nr = 4; + break; + case INTEL_FAM6_NEHALEM: + case INTEL_FAM6_NEHALEM_EP: + case INTEL_FAM6_NEHALEM_EX: + /* intel_pmu_lbr_init_nhm() */ + vcpu_lbr_nr = 16; + break; + case INTEL_FAM6_ATOM_BONNELL: + case INTEL_FAM6_ATOM_BONNELL_MID: + case INTEL_FAM6_ATOM_SALTWELL: + case INTEL_FAM6_ATOM_SALTWELL_MID: + case INTEL_FAM6_ATOM_SALTWELL_TABLET: + /* intel_pmu_lbr_init_atom() */ + vcpu_lbr_nr = 8; + break; + case INTEL_FAM6_ATOM_SILVERMONT: + case INTEL_FAM6_ATOM_SILVERMONT_X: + case INTEL_FAM6_ATOM_SILVERMONT_MID: + case INTEL_FAM6_ATOM_AIRMONT: + case INTEL_FAM6_ATOM_AIRMONT_MID: + /* intel_pmu_lbr_init_slm() */ + vcpu_lbr_nr = 8; + break; + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_ATOM_GOLDMONT_X: + /* intel_pmu_lbr_init_skl(); */ + vcpu_lbr_nr = 32; + break; + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + /* intel_pmu_lbr_init_skl()*/ + vcpu_lbr_nr = 32; + break; + case INTEL_FAM6_WESTMERE: + case INTEL_FAM6_WESTMERE_EP: + case INTEL_FAM6_WESTMERE_EX: + /* intel_pmu_lbr_init_nhm() */ + vcpu_lbr_nr = 16; + break; + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: + /* intel_pmu_lbr_init_snb() */ + vcpu_lbr_nr = 16; + break; + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_IVYBRIDGE_X: + /* intel_pmu_lbr_init_snb() */ + vcpu_lbr_nr = 16; + break; + case INTEL_FAM6_HASWELL_CORE: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_HASWELL_ULT: + case INTEL_FAM6_HASWELL_GT3E: + /* intel_pmu_lbr_init_hsw() */ + vcpu_lbr_nr = 16; + break; + case INTEL_FAM6_BROADWELL_CORE: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_GT3E: + case INTEL_FAM6_BROADWELL_X: + /* intel_pmu_lbr_init_hsw() */ + vcpu_lbr_nr = 16; + break; + case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: + /* intel_pmu_lbr_init_knl() */ + vcpu_lbr_nr = 8; + break; + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + /* intel_pmu_lbr_init_skl() */ + vcpu_lbr_nr = 32; + break; + default: + vcpu_lbr_nr = 0; + pr_warn("%s: vcpu model not supported %d\n", __func__, + vcpu_model); + } + + if (vcpu_lbr_nr != kvm->arch.lbr_stack.nr) + return false; + + return true; +} + static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -350,6 +466,7 @@ struct kvm_pmu_ops intel_pmu_ops = { .msr_idx_to_pmc = intel_msr_idx_to_pmc, .is_valid_msr_idx = intel_is_valid_msr_idx, .is_valid_msr = intel_is_valid_msr, + .lbr_enable = intel_pmu_lbr_enable, .get_msr = intel_pmu_get_msr, .set_msr = intel_pmu_set_msr, .refresh = intel_pmu_refresh, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 50efee4..02e29fd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4505,8 +4505,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, break; case KVM_CAP_X86_GUEST_LBR: r = -EINVAL; - if (cap->args[0] && - x86_perf_get_lbr_stack(&kvm->arch.lbr_stack)) { + if (cap->args[0] && !kvm_pmu_lbr_enable(kvm->vcpus[0])) { pr_err("Failed to enable the guest lbr feature\n"); break; } -- 2.7.4