From: Like Xu <like.xu@xxxxxxxxxxxxxxx> The number of Arch LBR entries available is determined by the value in host MSR_ARCH_LBR_DEPTH.DEPTH. The supported LBR depth values are enumerated in CPUID.(EAX=01CH, ECX=0):EAX[7:0]. For each bit "n" set in this field, the MSR_ARCH_LBR_DEPTH.DEPTH value of "8*(n+1)" is supported. On a guest write to MSR_ARCH_LBR_DEPTH, all LBR entries are reset to 0. KVM writes guest requested value to the native ARCH_LBR_DEPTH MSR (this is safe because the two values will be the same) when the Arch LBR records MSRs are pass-through to the guest. Signed-off-by: Like Xu <like.xu@xxxxxxxxxxxxxxx> Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 3 ++ arch/x86/kvm/vmx/pmu_intel.c | 63 ++++++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 974cbfb1eefe..a93b77b10933 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -515,6 +515,9 @@ struct kvm_pmu { * redundant check before cleanup if guest don't use vPMU at all. */ u8 event_count; + + /* Guest arch lbr depth supported by KVM. */ + u64 kvm_arch_lbr_depth; }; struct kvm_pmu_ops; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 9efc1a6b8693..cddc23689846 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -211,7 +211,7 @@ static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index) static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - int ret; + int ret = 0; switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -220,6 +220,10 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) case MSR_CORE_PERF_GLOBAL_OVF_CTRL: ret = pmu->version > 1; break; + case MSR_ARCH_LBR_DEPTH: + if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR)) + ret = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR); + break; default: ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || @@ -348,10 +352,26 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu, return true; } +/* + * Check if the requested depth value the same as that of host. + * When guest/host depth are different, the handling would be tricky, + * so now only max depth is supported for both host and guest. + */ +static bool arch_lbr_depth_is_valid(struct kvm_vcpu *vcpu, u64 depth) +{ + struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); + + if (!kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR)) + return false; + + return (depth == pmu->kvm_arch_lbr_depth); +} + static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; + struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); u32 msr = msr_info->index; switch (msr) { @@ -367,6 +387,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_CORE_PERF_GLOBAL_OVF_CTRL: msr_info->data = pmu->global_ovf_ctrl; return 0; + case MSR_ARCH_LBR_DEPTH: + msr_info->data = lbr_desc->records.nr; + return 0; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { @@ -393,8 +416,10 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; + struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu); u32 msr = msr_info->index; u64 data = msr_info->data; + u64 lbr_ctl; switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -427,6 +452,30 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 0; } break; + case MSR_ARCH_LBR_DEPTH: + if (!arch_lbr_depth_is_valid(vcpu, data)) + return 1; + lbr_desc->records.nr = data; + if (msr_info->host_initiated) + return 0; + + rdmsrl(MSR_ARCH_LBR_CTL, lbr_ctl); + /* + * If the host is using Arch LBR on this CPU at this point, + * then reject guest's request. + */ + if (lbr_ctl & ARCH_LBR_CTL_LBREN && !lbr_desc->event) + return 1; + /* If the event has been sched out, we don't write the msr.*/ + if (lbr_desc->event && + lbr_desc->event->state < PERF_EVENT_STATE_ACTIVE) + return 1; + /* + * Otherwise, guest is programming the depth MSR or reset lbr records + * via short-cut, then allow write to the MSR. + */ + wrmsrl(MSR_ARCH_LBR_DEPTH, lbr_desc->records.nr); + return 0; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { @@ -540,6 +589,18 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) if (lbr_desc->records.nr) bitmap_set(pmu->all_valid_pmc_idx, INTEL_PMC_IDX_FIXED_VLBR, 1); + + if (!kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR)) + return; + + entry = kvm_find_cpuid_entry(vcpu, 28, 0); + if (entry) { + /* + * The depth mask in CPUID has been forced by KVM to that of host + * supported value when user-space sets guest CPUID. + */ + pmu->kvm_arch_lbr_depth = fls(entry->eax & 0xff) * 8; + } } static void intel_pmu_init(struct kvm_vcpu *vcpu) -- 2.25.1