On Mon, Jul 14, 2014 at 12:25:56PM -0700, kan.liang@xxxxxxxxx wrote: Close enough. > +#define EVENT_EXTRA_REG(e, ms, m, vm, i, a) { \ > + .event = (e), \ > + .msr = (ms), \ > + .config_mask = (m), \ > + .valid_mask = (vm), \ > + .idx = EXTRA_REG_##i, \ > + .extra_msr_access = (a), \ > } > > #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ > - EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) > + EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx, true) > > #define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \ > EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \ > - ARCH_PERFMON_EVENTSEL_UMASK, vm, idx) > + ARCH_PERFMON_EVENTSEL_UMASK, vm, idx, true) > > #define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \ > INTEL_UEVENT_EXTRA_REG(c, \ > @@ -318,7 +320,7 @@ struct extra_reg { > 0xffff, \ > LDLAT) > > -#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) > +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0, false) > diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h > index 90236f0..d860e26 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h > +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h > @@ -366,11 +366,11 @@ > NHMEX_M_PMON_CTL_FLAG_MODE) > #define MBOX_INC_SEL_EXTAR_REG(c, r) \ > EVENT_EXTRA_REG(MBOX_INC_SEL(c), NHMEX_M0_MSR_PMU_##r, \ > - MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r) > + MBOX_INC_SEL_MASK, (u64)-1, NHMEX_M_##r, true) > #define MBOX_SET_FLAG_SEL_EXTRA_REG(c, r) \ > EVENT_EXTRA_REG(MBOX_SET_FLAG_SEL(c), NHMEX_M0_MSR_PMU_##r, \ > MBOX_SET_FLAG_SEL_MASK, \ > - (u64)-1, NHMEX_M_##r) > + (u64)-1, NHMEX_M_##r, true) > > /* NHM-EX Rbox */ > #define NHMEX_R_MSR_GLOBAL_CTL 0xe00 Since nobody ever treats EVENT_EXTRA_END as an actual event, the value of .extra_msr_access is irrelevant, this leaves the only 'possible' value 'true' and we can delete all those changes. Which, combined with a few whitespace cleanups, gives the below patch. --- Subject: x86, perf: Protect LBR and extra_regs against KVM lying From: Kan Liang <kan.liang@xxxxxxxxx> Date: Mon, 14 Jul 2014 12:25:56 -0700 With -cpu host, KVM reports LBR and extra_regs support, if the host has support. When the guest perf driver tries to access LBR or extra_regs MSR, it #GPs all MSR accesses,since KVM doesn't handle LBR and extra_regs support. So check the related MSRs access right once at initialization time to avoid the error access at runtime. For reproducing the issue, please build the kernel with CONFIG_KVM_INTEL = y (for host kernel). And CONFIG_PARAVIRT = n and CONFIG_KVM_GUEST = n (for guest kernel). Start the guest with -cpu host. Run perf record with --branch-any or --branch-filter in guest to trigger LBR Run perf stat offcore events (E.g. LLC-loads/LLC-load-misses ...) in guest to trigger offcore_rsp #GP Cc: andi@xxxxxxxxxxxxxx Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx> Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Link: http://lkml.kernel.org/r/1405365957-20202-1-git-send-email-kan.liang@xxxxxxxxx --- arch/x86/kernel/cpu/perf_event.c | 3 + arch/x86/kernel/cpu/perf_event.h | 12 +++--- arch/x86/kernel/cpu/perf_event_intel.c | 66 ++++++++++++++++++++++++++++++++- 3 files changed, 75 insertions(+), 6 deletions(-) --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config continue; if (event->attr.config1 & ~er->valid_mask) return -EINVAL; + /* Check if the extra msrs can be safely accessed*/ + if (!er->extra_msr_access) + return -ENXIO; reg->idx = er->idx; reg->config = event->attr.config1; --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -295,14 +295,16 @@ struct extra_reg { u64 config_mask; u64 valid_mask; int idx; /* per_xxx->regs[] reg index */ + bool extra_msr_access; }; #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ - .event = (e), \ - .msr = (ms), \ - .config_mask = (m), \ - .valid_mask = (vm), \ - .idx = EXTRA_REG_##i, \ + .event = (e), \ + .msr = (ms), \ + .config_mask = (m), \ + .valid_mask = (vm), \ + .idx = EXTRA_REG_##i, \ + .extra_msr_access = true, \ } #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2182,6 +2182,41 @@ static void intel_snb_check_microcode(vo } } +/* + * Under certain circumstances, access certain MSR may cause #GP. + * The function tests if the input MSR can be safely accessed. + */ +static bool check_msr(unsigned long msr, u64 mask) +{ + u64 val_old, val_new, val_tmp; + + /* + * Read the current value, change it and read it back to see if it + * matches, this is needed to detect certain hardware emulators + * (qemu/kvm) that don't trap on the MSR access and always return 0s. + */ + if (rdmsrl_safe(msr, &val_old)) + return false; + + /* + * Only change the bits which can be updated by wrmsrl. + */ + val_tmp = val_old ^ mask; + if (wrmsrl_safe(msr, val_tmp) || + rdmsrl_safe(msr, &val_new)) + return false; + + if (val_new != val_tmp) + return false; + + /* Here it's sure that the MSR can be safely accessed. + * Restore the old value and return. + */ + wrmsrl(msr, val_old); + + return true; +} + static __init void intel_sandybridge_quirk(void) { x86_pmu.check_microcode = intel_snb_check_microcode; @@ -2271,7 +2306,8 @@ __init int intel_pmu_init(void) union cpuid10_ebx ebx; struct event_constraint *c; unsigned int unused; - int version; + struct extra_reg *er; + int version, i; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -2577,6 +2613,34 @@ __init int intel_pmu_init(void) } } + /* + * Access LBR MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support LBR MSR + * Check all LBT MSR here. + * Disable LBR access if any LBR MSRs can not be accessed. + */ + if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + x86_pmu.lbr_nr = 0; + for (i = 0; i < x86_pmu.lbr_nr; i++) { + if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && + check_msr(x86_pmu.lbr_to + i, 0xffffUL))) + x86_pmu.lbr_nr = 0; + } + + /* + * Access extra MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support offcore event + * Check all extra_regs here. + */ + if (x86_pmu.extra_regs) { + for (er = x86_pmu.extra_regs; er->msr; er++) { + er->extra_msr_access = check_msr(er->msr, 0x1ffUL); + /* Disable LBR select mapping */ + if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) + x86_pmu.lbr_sel_map = NULL; + } + } + /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { x86_pmu.max_period = x86_pmu.cntval_mask;
Attachment:
pgpmX9rhCMcFh.pgp
Description: PGP signature