On Sun, Nov 03, 2024, Maxim Levitsky wrote: > On Mon, 2024-10-28 at 08:55 -0700, Sean Christopherson wrote: > > On Fri, Oct 18, 2024, Maxim Levitsky wrote: > > > Our CI found another issue, this time with vmx_pmu_caps_test. > > > > > > On 'Intel(R) Xeon(R) Gold 6328HL CPU' I see that all LBR msrs (from/to and > > > TOS), are always read only - even when LBR is disabled - once I disable the > > > feature in DEBUG_CTL, all LBR msrs reset to 0, and you can't change their > > > value manually. Freeze LBRS on PMI seems not to affect this behavior. ... > When DEBUG_CTL.LBR=1, the LBRs do work, I see all the registers update, > although TOS does seem to be stuck at one value, but it does change > sometimes, and it's non zero. > > The FROM/TO do show healthy amount of updates > > Note that I read all msrs using 'rdmsr' userspace tool. I'm pretty sure debugging via 'rdmsr', i.e. /dev/msr, isn't going to work. I assume perf is clobbering LBR MSRs on context switch, but I haven't tracked that down to confirm (the code I see on inspecition is gated on at least one perf event using LBRs). My guess is that there's a software bug somewhere in the perf/KVM exchange. I confirmed that using 'rdmsr' and 'wrmsr' "loses" values, but that hacking KVM to read/write all LBRs during initialization works with LBRs disabled. --- diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f72835e85b6d..c68a5a79c668 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7907,6 +7907,8 @@ static __init u64 vmx_get_perf_capabilities(void) { u64 perf_cap = PMU_CAP_FW_WRITES; u64 host_perf_cap = 0; + u64 debugctl, val; + int i; if (!enable_pmu) return 0; @@ -7954,6 +7956,39 @@ static __init u64 vmx_get_perf_capabilities(void) perf_cap &= ~PERF_CAP_PEBS_BASELINE; } + if (!vmx_lbr_caps.nr) { + pr_warn("Uh, what? No LBRs...\n"); + goto out; + } + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + if (debugctl & DEBUGCTLMSR_LBR) { + pr_warn("Huh, LBRs enabled at KVM load? debugctl = %llx\n", debugctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl & ~DEBUGCTLMSR_LBR); + } + + for (i = 0; i < vmx_lbr_caps.nr; i++) { + wrmsrl(vmx_lbr_caps.from + i, 0xbeef0000 + i); + wrmsrl(vmx_lbr_caps.to + i, 0xcafe0000 + i); + } + + for (i = 0; i < vmx_lbr_caps.nr; i++) { + rdmsrl(vmx_lbr_caps.from + i, val); + if (val != 0xbeef0000 + i) + pr_warn("MSR 0x%x Expected %x, got %llx\n", + vmx_lbr_caps.from + i, 0xbeef0000 + i, val); + rdmsrl(vmx_lbr_caps.to + i, val); + if (val != 0xcafe0000 + i) + pr_warn("MSR 0x%x Expected %x, got %llx\n", + vmx_lbr_caps.from + i, 0xcafe0000 + i, val); + } + + pr_warn("Done validating %u from/to LBRs\n", vmx_lbr_caps.nr); + + if (debugctl & DEBUGCTLMSR_LBR) + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + +out: return perf_cap; } -- And given that perf explicitly disables LBRs (see __intel_pmu_lbr_disable()) before reading LBR MSRs (see intel_pmu_lbr_read()) when taking a snaphot, and AFAIK no one has complained, I would be very surprised if this is hardware doing something odd. --- static noinline int __intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt, unsigned long flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); intel_pmu_lbr_read(); cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr); memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt); intel_pmu_enable_all(0); local_irq_restore(flags); return cnt; } static int intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) { unsigned long flags; /* must not have branches... */ local_irq_save(flags); __intel_pmu_disable_all(false); /* we don't care about BTS */ __intel_pmu_lbr_disable(); /* ... until here */ return __intel_pmu_snapshot_branch_stack(entries, cnt, flags); } ---