From: Kan Liang <kan.liang@xxxxxxxxx> x86, perf: Protect LBR and extra_regs against KVM lying With -cpu host, KVM reports LBR and extra_regs support, if the host has support. When the guest perf driver tries to access LBR or extra_regs MSR, it #GPs all MSR accesses,since KVM doesn't handle LBR and extra_regs support. So check the related MSRs access right once at initialization time to avoid the error access at runtime. For reproducing the issue, please build the kernel with CONFIG_KVM_INTEL = y (for host kernel). And CONFIG_PARAVIRT = n and CONFIG_KVM_GUEST = n (for guest kernel). Start the guest with -cpu host. Run perf record with --branch-any or --branch-filter in guest to trigger LBR #GP. Run perf stat offcore events (E.g. LLC-loads/LLC-load-misses ...) in guest to trigger offcore_rsp #GP Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx> V2: Move the check code to initialization time. V3: Add flag for each extra register. Check all LBR MSRs at initialization time. V4: Remove lbr_msr_access. For LBR msr, simply set lbr_nr to 0 if check_msr failed. Disable all extra msrs in creation places if check_msr failed. --- arch/x86/kernel/cpu/perf_event.c | 3 +++ arch/x86/kernel/cpu/perf_event.h | 21 +++++++++++++++++ arch/x86/kernel/cpu/perf_event_intel.c | 43 +++++++++++++++++++++++++++++++--- 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bdfbff..f0e8022 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) continue; if (event->attr.config1 & ~er->valid_mask) return -EINVAL; + /* Check if the extra msrs can be safely accessed*/ + if (!x86_pmu.extra_msr_access[er->idx]) + continue; reg->idx = er->idx; reg->config = event->attr.config1; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3b2f9bd..85725c5 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -464,6 +464,12 @@ struct x86_pmu { */ struct extra_reg *extra_regs; unsigned int er_flags; + /* + * EXTRA REG MSR can be accessed + * The extra registers are completely unrelated to each other. + * So it needs a flag for each extra register. + */ + bool extra_msr_access[EXTRA_REG_MAX]; /* * Intel host/guest support (KVM) @@ -525,6 +531,21 @@ extern u64 __read_mostly hw_cache_extra_regs [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; +/* + * Under certain circumstances, access certain MSR may cause #GP. + * The function tests if the input MSR can be safely accessed. + */ +static inline bool check_msr(unsigned long msr) +{ + u64 value; + + if (rdmsrl_safe(msr, &value) < 0) + return false; + if (wrmsrl_safe(msr, value) < 0) + return false; + return true; +} + u64 x86_perf_event_update(struct perf_event *event); static inline unsigned int x86_pmu_config_addr(int index) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index adb02aa..2be44be 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1471,11 +1471,14 @@ static void intel_fixup_er(struct perf_event *event, int idx) { event->hw.extra_reg.idx = idx; - if (idx == EXTRA_REG_RSP_0) { + /* The extra_reg doesn't update if msrs cannot be accessed safely */ + if ((idx == EXTRA_REG_RSP_0) && + x86_pmu.extra_msr_access[EXTRA_REG_RSP_0]) { event->hw.config &= ~INTEL_ARCH_EVENT_MASK; event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event; event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; - } else if (idx == EXTRA_REG_RSP_1) { + } else if ((idx == EXTRA_REG_RSP_1) && + x86_pmu.extra_msr_access[EXTRA_REG_RSP_1]) { event->hw.config &= ~INTEL_ARCH_EVENT_MASK; event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event; event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; @@ -2262,7 +2265,9 @@ __init int intel_pmu_init(void) union cpuid10_ebx ebx; struct event_constraint *c; unsigned int unused; - int version; + int version, i; + struct extra_reg *er; + bool access; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -2565,6 +2570,38 @@ __init int intel_pmu_init(void) } } + /* + * Access LBR MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support LBR MSR + * Check all LBT MSR here. + * Disable LBR access if any LBR MSRs can not be accessed. + */ + if (x86_pmu.lbr_nr) { + access = check_msr(x86_pmu.lbr_tos); + for (i = 0; i < x86_pmu.lbr_nr; i++) { + access &= check_msr(x86_pmu.lbr_from + i); + access &= check_msr(x86_pmu.lbr_to + i); + } + if (!access) + x86_pmu.lbr_nr = 0; + } + /* + * Access extra MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support offcore event + * Check all extra_regs here. + */ + if (x86_pmu.extra_regs) { + memset(x86_pmu.extra_msr_access, + true, sizeof(bool) * EXTRA_REG_MAX); + for (er = x86_pmu.extra_regs; er->msr; er++) { + x86_pmu.extra_msr_access[er->idx] = + check_msr(er->msr); + } + /* Disable LBR select mapping */ + if (!x86_pmu.extra_msr_access[EXTRA_REG_LBR]) + x86_pmu.lbr_sel_map = NULL; + } + /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { x86_pmu.max_period = x86_pmu.cntval_mask; -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html