KVM will check if a guest PEBS counter X is cross-mapped to the host counter Y. In this case, the applicable_counter field in the guest PEBS records is filled with the real host counter index(s) which is incorrect. Currently, KVM will disable guest PEBS before vm-entry and the later patches would do more emulations in the KVM to keep PEBS functionality work as host, such as rewriting applicable_counter field in the guest PEBS records buffer. The cross-mapped check should be done right before vm-entry but after local_irq_disable() since perf scheduler would rotate the pmc->perf_event to another host counter or make the event into error state via hrtimer irq. Signed-off-by: Like Xu <like.xu@xxxxxxxxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/pmu.c | 25 +++++++++++++++++++++++++ arch/x86/kvm/pmu.h | 1 + arch/x86/kvm/vmx/vmx.c | 3 +++ arch/x86/kvm/x86.c | 4 ++++ 5 files changed, 35 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 37df29061a4d..bffb384485da 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -455,6 +455,8 @@ struct kvm_pmu { u64 pebs_data_cfg; u64 pebs_data_cfg_mask; + bool counter_cross_mapped; + /* * The gate to release perf_events not marked in * pmc_in_use only once in a vcpu time slice. diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index f8aa4724d67b..a6c5951a5728 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -549,3 +549,28 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) kfree(filter); return r; } + +/* + * The caller needs to ensure that there is no time window for + * perf hrtimer irq or any chance to reschedule pmc->perf_event. + */ +void kvm_pmu_counter_cross_mapped_check(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc = NULL; + int bit; + + pmu->counter_cross_mapped = false; + + for_each_set_bit(bit, (unsigned long *)&pmu->pebs_enable, X86_PMC_IDX_MAX) { + pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit); + + if (!pmc || !pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc)) + continue; + + if (pmc->perf_event && (pmc->idx != pmc->perf_event->hw.idx)) { + pmu->counter_cross_mapped = true; + break; + } + } +} diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index ee8f15cc4b5e..f5ec94e9a1dc 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -163,6 +163,7 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu); void kvm_pmu_cleanup(struct kvm_vcpu *vcpu); void kvm_pmu_destroy(struct kvm_vcpu *vcpu); int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp); +void kvm_pmu_counter_cross_mapped_check(struct kvm_vcpu *vcpu); bool is_vmware_backdoor_pmc(u32 pmc_idx); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3b62907c8959..302808ec9699 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6556,6 +6556,9 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx) if (!msrs) return; + if (pmu->counter_cross_mapped) + msrs[1].guest = 0; + if (nr_msrs > 2 && msrs[1].guest) { msrs[2].guest = pmu->ds_area; if (nr_msrs > 3) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b5963a36bf6b..88a544e6379f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8859,6 +8859,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * result in virtual interrupt delivery. */ local_irq_disable(); + + if (vcpu_to_pmu(vcpu)->global_ctrl & vcpu_to_pmu(vcpu)->pebs_enable) + kvm_pmu_counter_cross_mapped_check(vcpu); + vcpu->mode = IN_GUEST_MODE; srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); -- 2.21.3