From: Like Xu <likexu@xxxxxxxxxxx> The processor supports the Branch Trace Store facility (BTS) if it has DS buffer and the MISC_ENABLE_BTS_UNAVAIL (RO) bit is cleared. The processor can supports the CPL-qualified branch trace mechanism (DSCPL) if CPUID.01H:ECX[bit 4] = 1. To support guest BTS, we need expose three IA32_DEBUGCTL bits to the guest: The TR bit makes processor to send the branch record out on the system bus as a branch trace message (BTM) when it detects a taken branch, interrupt, or exception. The BTS bit makes processor to log BTMs to a memory-resident BTS buffer that is part of the DS save area. The BTINT bit makes processor generates an interrupt when the BTS buffer is full. A simple perf test case could be: perf record --per-thread -e intel_bts// ./workload and a valid sample looks like: branches: 401243 cmp_end+0x0 (./workload) => ffffffffb6e01410 asm_exc_nmi+0x0 ([kernel.kallsyms]) Signed-off-by: Like Xu <likexu@xxxxxxxxxxx> --- arch/x86/events/intel/bts.c | 2 ++ arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/pmu.h | 3 +++ arch/x86/kvm/vmx/capabilities.h | 7 +++++++ arch/x86/kvm/vmx/vmx.c | 32 ++++++++++++++++++++++++++++---- 5 files changed, 41 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index ffdcde5b97b1..32a7bfe24deb 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -463,6 +463,8 @@ int intel_bts_interrupt(void) */ if (ds && (ds->bts_index >= ds->bts_interrupt_threshold)) handled = 1; + else if (perf_guest_state() && perf_handle_guest_intr(GUEST_INTEL_BTS)) + return 1; /* * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 166a77a61f2d..3b0116340399 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1663,6 +1663,7 @@ struct kvm_x86_nested_ops { enum { GUEST_INTEL_PT = 0, + GUEST_INTEL_BTS, GUEST_INVALID }; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 889d064d5ddd..bd3eb5339376 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -11,6 +11,9 @@ #define MSR_IA32_MISC_ENABLE_PMU_RO_MASK (MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | \ MSR_IA32_MISC_ENABLE_BTS_UNAVAIL) +#define DEBUGCTLMSR_BTS_MASK (DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT) +#define DEBUGCTLMSR_DSCPL_MASK (DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR) + /* retrieve the 4 bits for EN and PMI out of IA32_FIXED_CTR_CTRL */ #define fixed_ctrl_field(ctrl_reg, idx) (((ctrl_reg) >> ((idx)*4)) & 0xf) diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 4dc4bbe18821..cd3b97528ab0 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -435,6 +435,13 @@ static inline u64 vmx_supported_debugctl(void) if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) debugctl |= DEBUGCTLMSR_LBR_MASK; + if (vmx_pebs_supported() && boot_cpu_has(X86_FEATURE_BTS)) { + debugctl |= DEBUGCTLMSR_BTS_MASK; + /* CPL-Qualified Branch Trace Mechanism */ + if (boot_cpu_has(X86_FEATURE_DSCPL)) + debugctl |= DEBUGCTLMSR_DSCPL_MASK; + } + return debugctl; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3622323d57c2..cd396ca3c001 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2016,6 +2016,13 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu) if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; + if (!guest_cpuid_has(vcpu, X86_FEATURE_DS) || + (vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)) { + debugctl &= ~(DEBUGCTLMSR_BTS_MASK | DEBUGCTLMSR_DSCPL_MASK); + } else if (!guest_cpuid_has(vcpu, X86_FEATURE_DSCPL)) { + debugctl &= ~DEBUGCTLMSR_DSCPL_MASK; + } + return debugctl; } @@ -7691,6 +7698,8 @@ static __init void vmx_set_cpu_caps(void) if (vmx_pebs_supported()) { kvm_cpu_cap_check_and_set(X86_FEATURE_DS); kvm_cpu_cap_check_and_set(X86_FEATURE_DTES64); + if (kvm_cpu_cap_has(X86_FEATURE_DS)) + kvm_cpu_cap_check_and_set(X86_FEATURE_DSCPL); } if (!enable_pmu) @@ -8149,6 +8158,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { static unsigned int vmx_handle_guest_intr(unsigned int vector) { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); + u64 data; /* '0' on failure so that the !PT case can use a RET0 static call. */ if (!vcpu || !kvm_handling_nmi_from_guest(vcpu)) @@ -8157,10 +8167,24 @@ static unsigned int vmx_handle_guest_intr(unsigned int vector) if (vector >= GUEST_INVALID) return 0; - kvm_make_request(KVM_REQ_PMI, vcpu); - __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT, - (unsigned long *)&vcpu->arch.pmu.global_status); - return 1; + switch (vector) { + case GUEST_INTEL_PT: + __set_bit(MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT, + (unsigned long *)&vcpu->arch.pmu.global_status); + kvm_make_request(KVM_REQ_PMI, vcpu); + return 1; + case GUEST_INTEL_BTS: + data = vmcs_read64(GUEST_IA32_DEBUGCTL); + if ((data & DEBUGCTLMSR_BTS_MASK) == DEBUGCTLMSR_BTS_MASK) { + kvm_make_request(KVM_REQ_PMI, vcpu); + return 1; + } + break; + default: + break; + } + + return 0; } static __init void vmx_setup_user_return_msrs(void) -- 2.37.3