The Intel PEBS-via-PT feature introduces a new output mechanism that directs PEBS records to the PT buffer, and after each PEBS record is generated, it automatically reloads the counter values from a new set of "reload values" MSRs (based on MSR_RELOAD_FIXED_CTRx and MSR_RELOAD_PMCx), instead of the counter reload values stored in the DS management area. If perf_capabilities supports this capability, PEBS records will be directed to the PT buffer when the relevant bit in pebs_enable is set. Co-developed-by: Luwei Kang <luwei.kang@xxxxxxxxx> Signed-off-by: Luwei Kang <luwei.kang@xxxxxxxxx> Signed-off-by: Like Xu <like.xu@xxxxxxxxxxxxxxx> --- arch/x86/events/perf_event.h | 5 ----- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/msr-index.h | 6 ++++++ arch/x86/kvm/pmu.h | 8 ++++++++ arch/x86/kvm/vmx/pmu_intel.c | 18 ++++++++++++++++++ 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 685a1a4e9438..4171f1328732 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -115,11 +115,6 @@ struct amd_nb { }; #define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1) -#define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60) -#define PEBS_OUTPUT_OFFSET 61 -#define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET) -#define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET) -#define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD) /* * Flags PEBS can handle without an PMI. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 15bff609fd57..29d2d8027014 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -443,6 +443,7 @@ struct kvm_pmc { u8 idx; u64 counter; u64 eventsel; + u64 reload_counter; struct perf_event *perf_event; struct kvm_vcpu *vcpu; /* diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1ab3f280f3a9..364c40ecd963 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -187,12 +187,18 @@ #define MSR_IA32_PERF_CAPABILITIES 0x00000345 #define PERF_CAP_METRICS_IDX 15 #define PERF_CAP_PT_IDX 16 +#define PEBS_PMI_AFTER_EACH_RECORD BIT_ULL(60) +#define PEBS_OUTPUT_OFFSET 61 +#define PEBS_OUTPUT_MASK (3ull << PEBS_OUTPUT_OFFSET) +#define PEBS_OUTPUT_PT (1ull << PEBS_OUTPUT_OFFSET) +#define PEBS_VIA_PT_MASK (PEBS_OUTPUT_PT | PEBS_PMI_AFTER_EACH_RECORD) #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 #define PERF_CAP_PEBS_TRAP BIT_ULL(6) #define PERF_CAP_ARCH_REG BIT_ULL(7) #define PERF_CAP_PEBS_FORMAT 0xf00 #define PERF_CAP_PEBS_BASELINE BIT_ULL(14) +#define PERF_CAP_PEBS_OUTPUT_PT BIT_ULL(16) #define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 6720881b8370..f9895a7a59bc 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -115,6 +115,10 @@ static inline bool kvm_valid_perf_global_ctrl(struct kvm_pmu *pmu, static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, u32 base) { + if ((msr == MSR_RELOAD_PMC0 || msr == MSR_RELOAD_FIXED_CTR0) && + !(pmu_to_vcpu(pmu)->arch.perf_capabilities & PERF_CAP_PEBS_OUTPUT_PT)) + return NULL; + if (msr >= base && msr < base + pmu->nr_arch_gp_counters) { u32 index = array_index_nospec(msr - base, pmu->nr_arch_gp_counters); @@ -129,6 +133,10 @@ static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr, u32 base) { + if ((msr == MSR_RELOAD_PMC0 || msr == MSR_RELOAD_FIXED_CTR0) && + !(pmu_to_vcpu(pmu)->arch.perf_capabilities & PERF_CAP_PEBS_OUTPUT_PT)) + return NULL; + if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) { u32 index = array_index_nospec(msr - base, pmu->nr_arch_fixed_counters); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index c10cb3008bf1..e5c12c958cdb 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -226,6 +226,8 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || get_fixed_pmc(pmu, msr, MSR_CORE_PERF_FIXED_CTR0) || get_fw_gp_pmc(pmu, msr) || + get_gp_pmc(pmu, msr, MSR_RELOAD_PMC0) || + get_fixed_pmc(pmu, msr, MSR_RELOAD_FIXED_CTR0) || intel_pmu_is_valid_lbr_msr(vcpu, msr); break; } @@ -241,6 +243,8 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) pmc = get_fixed_pmc(pmu, msr, MSR_CORE_PERF_FIXED_CTR0); pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0); pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0); + pmc = pmc ? pmc : get_gp_pmc(pmu, msr, MSR_RELOAD_PMC0); + pmc = pmc ? pmc : get_fixed_pmc(pmu, msr, MSR_RELOAD_FIXED_CTR0); return pmc; } @@ -394,6 +398,10 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { msr_info->data = pmc->eventsel; return 0; + } else if ((pmc = get_gp_pmc(pmu, msr, MSR_RELOAD_PMC0)) || + (pmc = get_fixed_pmc(pmu, msr, MSR_RELOAD_FIXED_CTR0))) { + msr_info->data = pmc->reload_counter; + return 0; } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) return 0; } @@ -488,6 +496,12 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) reprogram_gp_counter(pmc, data); return 0; } + } else if ((pmc = get_gp_pmc(pmu, msr, MSR_RELOAD_PMC0)) || + (pmc = get_fixed_pmc(pmu, msr, MSR_RELOAD_FIXED_CTR0))) { + if (!(data & ~pmc_bitmask(pmc))) { + pmc->reload_counter = data; + return 0; + } } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) return 0; } @@ -595,6 +609,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->pebs_enable_mask = ~((1ull << pmu->nr_arch_gp_counters) - 1); } + if (vcpu->arch.perf_capabilities & PERF_CAP_PEBS_OUTPUT_PT) + pmu->pebs_enable_mask &= ~PEBS_VIA_PT_MASK; } else { vcpu->arch.ia32_misc_enable_msr |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL; vcpu->arch.perf_capabilities &= ~PERF_CAP_PEBS_MASK; @@ -612,6 +628,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) pmu->gp_counters[i].vcpu = vcpu; pmu->gp_counters[i].idx = i; pmu->gp_counters[i].current_config = 0; + pmu->gp_counters[i].reload_counter = 0; } for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { @@ -619,6 +636,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) pmu->fixed_counters[i].vcpu = vcpu; pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; pmu->fixed_counters[i].current_config = 0; + pmu->fixed_counters[i].reload_counter = 0; } vcpu->arch.perf_capabilities = vmx_get_perf_capabilities(); -- 2.31.1