[PATCH V5 1/2] perf ignore LBR and extra_regs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Kan Liang <kan.liang@xxxxxxxxx>

x86, perf: Protect LBR and extra_regs against KVM lying

With -cpu host, KVM reports LBR and extra_regs support, if the host has support.
When the guest perf driver tries to access LBR or extra_regs MSR,
it #GPs all MSR accesses,since KVM doesn't handle LBR and extra_regs support.
So check the related MSRs access right once at initialization time to avoid the error access at runtime.

For reproducing the issue, please build the kernel with CONFIG_KVM_INTEL = y (for host kernel).
And CONFIG_PARAVIRT = n and CONFIG_KVM_GUEST = n (for guest kernel).
Start the guest with -cpu host.
Run perf record with --branch-any or --branch-filter in guest to trigger LBR #GP.
Run perf stat offcore events (E.g. LLC-loads/LLC-load-misses ...) in guest to trigger offcore_rsp #GP

Signed-off-by: Kan Liang <kan.liang@xxxxxxxxx>

V2: Move the check code to initialization time.
V3: Add flag for each extra register.
    Check all LBR MSRs at initialization time.
V4: Remove lbr_msr_access. For LBR msr, simply set lbr_nr to 0 if check_msr failed.
    Disable all extra msrs in creation places if check_msr failed.
V5: Fix check_msr broken
    Don't check any more MSRs after the first fail
    Return error when checking fail to stop creating the event
    Remove the checking code path which never get
---
 arch/x86/kernel/cpu/perf_event.c       |  3 +++
 arch/x86/kernel/cpu/perf_event.h       | 45 ++++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/perf_event_intel.c | 38 +++++++++++++++++++++++++++-
 3 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2bdfbff..a7c5e4b 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
 			continue;
 		if (event->attr.config1 & ~er->valid_mask)
 			return -EINVAL;
+		/* Check if the extra msrs can be safely accessed*/
+		if (!x86_pmu.extra_msr_access[er->idx])
+			return -EFAULT;
 
 		reg->idx = er->idx;
 		reg->config = event->attr.config1;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3b2f9bd..992c678 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -464,6 +464,12 @@ struct x86_pmu {
 	 */
 	struct extra_reg *extra_regs;
 	unsigned int er_flags;
+	/*
+	 * EXTRA REG MSR can be accessed
+	 * The extra registers are completely unrelated to each other.
+	 * So it needs a flag for each extra register.
+	 */
+	bool		extra_msr_access[EXTRA_REG_MAX];
 
 	/*
 	 * Intel host/guest support (KVM)
@@ -525,6 +531,45 @@ extern u64 __read_mostly hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
 
+/*
+ * Under certain circumstances, access certain MSR may cause #GP.
+ * The function tests if the input MSR can be safely accessed.
+ */
+static inline bool check_msr(unsigned long msr)
+{
+	u64 val_old, val_new, val_tmp;
+
+	/*
+	 * Read the current value, change it and read it back to see if it
+	 * matches, this is needed to detect certain hardware emulators
+	 * (qemu/kvm) that don't trap on the MSR access and always return 0s.
+	 */
+	if (rdmsrl_safe(msr, &val_old))
+		goto msr_fail;
+	/*
+	 * Only chagne it slightly,
+	 * since the higher bits of some MSRs cannot be updated by wrmsrl.
+	 * E.g. MSR_LBR_TOS
+	 */
+	val_tmp = val_old ^ 0x3UL;
+	if (wrmsrl_safe(msr, val_tmp) ||
+		rdmsrl_safe(msr, &val_new))
+		goto msr_fail;
+
+	if (val_new != val_tmp)
+		goto msr_fail;
+
+	/* Here it's sure that the MSR can be safely accessed.
+	 * Restore the old value and return.
+	 */
+	wrmsrl(msr, val_old);
+
+	return true;
+
+msr_fail:
+	return false;
+}
+
 u64 x86_perf_event_update(struct perf_event *event);
 
 static inline unsigned int x86_pmu_config_addr(int index)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index adb02aa..953b2b2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2262,7 +2262,8 @@ __init int intel_pmu_init(void)
 	union cpuid10_ebx ebx;
 	struct event_constraint *c;
 	unsigned int unused;
-	int version;
+	int version, i;
+	struct extra_reg *er;
 
 	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
 		switch (boot_cpu_data.x86) {
@@ -2565,6 +2566,41 @@ __init int intel_pmu_init(void)
 		}
 	}
 
+	/*
+	 * Access LBR MSR may cause #GP under certain circumstances.
+	 * E.g. KVM doesn't support LBR MSR
+	 * Check all LBT MSR here.
+	 * Disable LBR access if any LBR MSRs can not be accessed.
+	 */
+	if (x86_pmu.lbr_nr) {
+		if (check_msr(x86_pmu.lbr_tos)) {
+			for (i = 0; i < x86_pmu.lbr_nr; i++) {
+				if (!(check_msr(x86_pmu.lbr_from + i) &&
+				      check_msr(x86_pmu.lbr_to + i))) {
+					x86_pmu.lbr_nr = 0;
+					break;
+				}
+			}
+		} else
+			x86_pmu.lbr_nr = 0;
+	}
+	/*
+	 * Access extra MSR may cause #GP under certain circumstances.
+	 * E.g. KVM doesn't support offcore event
+	 * Check all extra_regs here.
+	 */
+	if (x86_pmu.extra_regs) {
+		memset(x86_pmu.extra_msr_access,
+			true, sizeof(bool) * EXTRA_REG_MAX);
+		for (er = x86_pmu.extra_regs; er->msr; er++) {
+			x86_pmu.extra_msr_access[er->idx] =
+				check_msr(er->msr);
+		}
+		/* Disable LBR select mapping */
+		if (!x86_pmu.extra_msr_access[EXTRA_REG_LBR])
+			x86_pmu.lbr_sel_map = NULL;
+	}
+
 	/* Support full width counters using alternative MSR range */
 	if (x86_pmu.intel_cap.full_width_write) {
 		x86_pmu.max_period = x86_pmu.cntval_mask;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux