From: Like Xu <likexu@xxxxxxxxxxx> Batch reprogramming PMU counters by setting KVM_REQ_PMU and thus deferring reprogramming kvm_pmu_handle_event() to avoid reprogramming a counter multiple times during a single VM-Exit. Deferring programming will also allow KVM to fix a bug where immediately reprogramming a counter can result in sleeping (taking a mutex) while interrupts are disabled in the VM-Exit fastpath. Introduce kvm_pmu_request_counter_reprogam() to make it obvious that KVM is _requesting_ a reprogram and not actually doing the reprogram. Opportunistically refine related comments to avoid misunderstandings. Signed-off-by: Like Xu <likexu@xxxxxxxxxxx> Link: https://lore.kernel.org/r/20220831085328.45489-5-likexu@xxxxxxxxxxx Signed-off-by: Sean Christopherson <seanjc@xxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/pmu.c | 17 ++++++++++++----- arch/x86/kvm/pmu.h | 6 +++++- arch/x86/kvm/svm/pmu.c | 2 +- arch/x86/kvm/vmx/pmu_intel.c | 6 +++--- 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 462f041ede9f..12dcfc9330e7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -493,6 +493,7 @@ struct kvm_pmc { struct perf_event *perf_event; struct kvm_vcpu *vcpu; /* + * only for creating or reusing perf_event, * eventsel value for general purpose counters, * ctrl value for fixed counters. */ diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 4cd99320019b..d8330e6064ab 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -101,7 +101,11 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) struct kvm_pmu *pmu = pmc_to_pmu(pmc); bool skip_pmi = false; - /* Ignore counters that have been reprogrammed already. */ + /* + * Ignore overflow events for counters that are scheduled to be + * reprogrammed, e.g. if a PMI for the previous event races with KVM's + * handling of a related guest WRMSR. + */ if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) return; @@ -292,7 +296,7 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc) return allow_event; } -void reprogram_counter(struct kvm_pmc *pmc) +static void reprogram_counter(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); u64 eventsel = pmc->eventsel; @@ -345,7 +349,6 @@ void reprogram_counter(struct kvm_pmc *pmc) reprogram_complete: clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); } -EXPORT_SYMBOL_GPL(reprogram_counter); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) { @@ -355,10 +358,11 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) { struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit); - if (unlikely(!pmc || !pmc->perf_event)) { + if (unlikely(!pmc)) { clear_bit(bit, pmu->reprogram_pmi); continue; } + reprogram_counter(pmc); } @@ -552,12 +556,15 @@ static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, static inline bool cpl_is_matched(struct kvm_pmc *pmc) { bool select_os, select_user; - u64 config = pmc->current_config; + u64 config; if (pmc_is_gp(pmc)) { + config = pmc->eventsel; select_os = config & ARCH_PERFMON_EVENTSEL_OS; select_user = config & ARCH_PERFMON_EVENTSEL_USR; } else { + config = fixed_ctrl_field(pmc_to_pmu(pmc)->fixed_ctr_ctrl, + pmc->idx - INTEL_PMC_IDX_FIXED); select_os = config & 0x1; select_user = config & 0x2; } diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 5cc5721f260b..85ff3c0588ba 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -183,7 +183,11 @@ static inline void kvm_init_pmu_capability(void) KVM_PMC_MAX_FIXED); } -void reprogram_counter(struct kvm_pmc *pmc); +static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc) +{ + set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); + kvm_make_request(KVM_REQ_PMU, pmc->vcpu); +} void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index b68956299fa8..041aa898e1bc 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -159,7 +159,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) data &= ~pmu->reserved_bits; if (data != pmc->eventsel) { pmc->eventsel = data; - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); } return 0; } diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 25b70a85bef5..e38518afc265 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -52,7 +52,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); } } @@ -76,7 +76,7 @@ static void reprogram_counters(struct kvm_pmu *pmu, u64 diff) for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) { pmc = intel_pmc_idx_to_pmc(pmu, bit); if (pmc) - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); } } @@ -477,7 +477,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) reserved_bits ^= HSW_IN_TX_CHECKPOINTED; if (!(data & reserved_bits)) { pmc->eventsel = data; - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); return 0; } } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) -- 2.37.3.998.g577e59143f-goog