Re: [PATCH v7 04/15] KVM: vmx/pmu: Emulate MSR_ARCH_LBR_DEPTH for guest Arch LBR

Like Xu <like.xu.linux@xxxxxxxxx> · Mon, 9 Aug 2021 21:16:47 +0800

On 6/8/2021 3:42 pm, Yang Weijiang wrote:
From: Like Xu <like.xu@xxxxxxxxxxxxxxx>

...


The number of Arch LBR entries available is determined by the value
in host MSR_ARCH_LBR_DEPTH.DEPTH. The supported LBR depth values are
enumerated in CPUID.(EAX=01CH, ECX=0):EAX[7:0]. For each bit "n" set
in this field, the MSR_ARCH_LBR_DEPTH.DEPTH value of "8*(n+1)" is
supported.

On a guest write to MSR_ARCH_LBR_DEPTH, all LBR entries are reset to 0.
KVM writes guest requested value to the native ARCH_LBR_DEPTH MSR
(this is safe because the two values will be the same) when the Arch LBR
records MSRs are pass-through to the guest.

Signed-off-by: Like Xu <like.xu@xxxxxxxxxxxxxxx>
Signed-off-by: Yang Weijiang <weijiang.yang@xxxxxxxxx>
---
  arch/x86/kvm/vmx/pmu_intel.c | 35 ++++++++++++++++++++++++++++++++++-
  1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 9efc1a6b8693..a4ef5bbce186 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -211,7 +211,7 @@ static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
  static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
  {
  	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
-	int ret;
+	int ret = 0;
  
  	switch (msr) {
  	case MSR_CORE_PERF_FIXED_CTR_CTRL:
@@ -220,6 +220,10 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
  	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
  		ret = pmu->version > 1;
  		break;
+	case MSR_ARCH_LBR_DEPTH:
+		if (kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
+			ret = guest_cpuid_has(vcpu, X86_FEATURE_ARCH_LBR);
+		break;
  	default:
  		ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
  			get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
@@ -348,10 +352,28 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
  	return true;
  }
  
+/*
+ * Check if the requested depth value the same as that of host.
+ * When guest/host depth are different, the handling would be tricky,
+ * so now only max depth is supported for both host and guest.
+ */
+static bool arch_lbr_depth_is_valid(struct kvm_vcpu *vcpu, u64 depth)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	if (!kvm_cpu_cap_has(X86_FEATURE_ARCH_LBR))
+		return false;
+
+	cpuid_count(0x1c, 0, &eax, &ebx, &ecx, &edx);

I really don't understand why the sanity check of the
guest lbr depth needs to read the host's cpuid entry and it's pretty slow.

KVM has reported the maximum host LBR depth as the only supported value.

+
+	return (depth == fls(eax & 0xff) * 8);
+}
+
  static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  {
  	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  	struct kvm_pmc *pmc;
+	struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
  	u32 msr = msr_info->index;
  
  	switch (msr) {
@@ -367,6 +389,9 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  	case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
  		msr_info->data = pmu->global_ovf_ctrl;
  		return 0;
+	case MSR_ARCH_LBR_DEPTH:
+		msr_info->data = lbr_desc->records.nr;
+		return 0;
  	default:
  		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
  		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -393,6 +418,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  {
  	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
  	struct kvm_pmc *pmc;
+	struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
  	u32 msr = msr_info->index;
  	u64 data = msr_info->data;
  
@@ -427,6 +453,13 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
  			return 0;
  		}
  		break;
+	case MSR_ARCH_LBR_DEPTH:
+		if (!arch_lbr_depth_is_valid(vcpu, data))
+			return 1;
+		lbr_desc->records.nr = data;
+		if (!msr_info->host_initiated)
+			wrmsrl(MSR_ARCH_LBR_DEPTH, lbr_desc->records.nr);

Resetting the host msr here is dangerous,
what if the guest LBR event doesn't exist or isn't scheduled on?

+		return 0;
  	default:
  		if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
  		    (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {