Re: [PATCH 08/13] perf/x86/amd: Add framework to save/restore host IBS state

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 9/4/23 04:53, Manali Shukla wrote:
Since IBS registers falls under swap type C [1], only the guest state

s/falls/fall/

is saved and restored automatically by the hardware. Host state needs
to be saved and restored manually by the hypervisor. Note that, saving
and restoring of host IBS state happens only when IBS is active on
host to avoid unnecessary rdmsrs/wrmsrs.

Also, hypervisor needs to disable host IBS before VMRUN and re-enable
it after VMEXIT [2]. However, disabling and enabling of IBS leads to
subtle races between software and hardware since IBS_*_CTL registers
contain both control and result bits in the same MSR.

Consider the following scenario, hypervisor reads IBS control MSR and
finds enable=1 (control bit) and valid=0 (result bit). While kernel is
clearing enable bit in its local copy, IBS hardware sets valid bit to
1 in the MSR. Software, who is unaware of the change done by IBS
hardware, overwrites IBS MSR with enable=0 and valid=0. Note that,
this situation occurs while NMIs are disabled. So CPU will receive IBS
NMI only after STGI. However, the IBS driver won't handle NMI because
of the valid bit being 0. Since the real source of NMI was IBS, nobody
else will also handle it which will result in the unknown NMIs.

Handle the above mentioned race by keeping track of different actions
performed by KVM on IBS:

   WINDOW_START: After CLGI and before VMRUN. KVM informs IBS driver
                 about its intention to enable IBS for the guest. Thus
		IBS should be disabled on host and IBS host register
		state should be saved.
   WINDOW_STOPPING: After VMEXIT and before STGI. KVM informs IBS driver
                 that it's done using IBS inside the guest and thus host
		IBS state should be restored followed by re-enabling
		IBS for host.
   WINDOW_STOPPED: After STGI. CPU will receive any pending NMI if it
                 was raised between CLGI and STGI. NMI will be marked
		as handled by IBS driver if WINDOW_STOPPED action is
                 _not performed, valid bit is _not_ set and a valid
                 IBS event exists. However, IBS sample won't be generated.

[1]: https://bugzilla.kernel.org/attachment.cgi?id=304653
      AMD64 Architecture Programmer’s Manual, Vol 2, Appendix B Layout
      of VMCB, Table B-3 Swap Types.

[2]: https://bugzilla.kernel.org/attachment.cgi?id=304653
      AMD64 Architecture Programmer’s Manual, Vol 2, Section 15.38
      Instruction-Based Sampling Virtualization.

Signed-off-by: Manali Shukla <manali.shukla@xxxxxxx>
---
  arch/x86/events/amd/Makefile      |   2 +-
  arch/x86/events/amd/ibs.c         |  23 +++++++
  arch/x86/events/amd/vibs.c        | 101 ++++++++++++++++++++++++++++++
  arch/x86/include/asm/perf_event.h |  27 ++++++++
  4 files changed, 152 insertions(+), 1 deletion(-)
  create mode 100644 arch/x86/events/amd/vibs.c

diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile
index 527d947eb76b..13c2980db9a7 100644
--- a/arch/x86/events/amd/Makefile
+++ b/arch/x86/events/amd/Makefile
@@ -2,7 +2,7 @@
  obj-$(CONFIG_CPU_SUP_AMD)		+= core.o lbr.o
  obj-$(CONFIG_PERF_EVENTS_AMD_BRS)	+= brs.o
  obj-$(CONFIG_PERF_EVENTS_AMD_POWER)	+= power.o
-obj-$(CONFIG_X86_LOCAL_APIC)		+= ibs.o
+obj-$(CONFIG_X86_LOCAL_APIC)		+= ibs.o vibs.o
  obj-$(CONFIG_PERF_EVENTS_AMD_UNCORE)	+= amd-uncore.o
  amd-uncore-objs				:= uncore.o
  ifdef CONFIG_AMD_IOMMU
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 6911c5399d02..359464f2910d 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -1039,6 +1039,16 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
  		 */
  		if (test_and_clear_bit(IBS_STOPPED, pcpu->state))
  			return 1;
+		/*
+		 * Catch NMIs generated in an active IBS window: Incoming NMIs
+		 * from an active IBS window might have the VALID bit cleared
+		 * when it is supposed to be set due to a race. The reason for
+		 * the race is ENABLE and VALID bits for MSR_AMD64_IBSFETCHCTL
+		 * and MSR_AMD64_IBSOPCTL being in their same respective MSRs.
+		 * Ignore all such NMIs and treat them as handled.
+		 */
+		if (amd_vibs_ignore_nmi())
+			return 1;
return 0;
  	}
@@ -1542,3 +1552,16 @@ static __init int amd_ibs_init(void)
/* Since we need the pci subsystem to init ibs we can't do this earlier: */
  device_initcall(amd_ibs_init);
+
+static inline bool get_ibs_state(struct perf_ibs *perf_ibs)
+{
+	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
+
+	return test_bit(IBS_STARTED, pcpu->state);
+}
+
+bool is_amd_ibs_started(void)
+{
+	return get_ibs_state(&perf_ibs_fetch) || get_ibs_state(&perf_ibs_op);
+}
+EXPORT_SYMBOL_GPL(is_amd_ibs_started);

If this is only used by the IBS support, it shouldn't have an EXPORT_SYMBOL_GPL.

diff --git a/arch/x86/events/amd/vibs.c b/arch/x86/events/amd/vibs.c
new file mode 100644
index 000000000000..273a60f1cb7f
--- /dev/null
+++ b/arch/x86/events/amd/vibs.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Virtualized Performance events - AMD VIBS
+ *
+ *  Copyright (C) 2023 Advanced Micro Devices, Inc., Manali Shukla
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+
+DEFINE_PER_CPU(bool, vibs_window_active);
+
+static bool amd_disable_ibs_fetch(u64 *ibs_fetch_ctl)
+{
+	*ibs_fetch_ctl = __rdmsr(MSR_AMD64_IBSFETCHCTL);
+	if (!(*ibs_fetch_ctl & IBS_FETCH_ENABLE))
+		return false;
+
+	native_wrmsrl(MSR_AMD64_IBSFETCHCTL, *ibs_fetch_ctl & ~IBS_FETCH_ENABLE);
+
+	return true;
+}
+
+static u64 amd_disable_ibs_op(u64 *ibs_op_ctl)
+{
+	*ibs_op_ctl = __rdmsr(MSR_AMD64_IBSOPCTL);
+	if (!(*ibs_op_ctl & IBS_OP_ENABLE))
+		return false;
+
+	native_wrmsrl(MSR_AMD64_IBSOPCTL, *ibs_op_ctl & ~IBS_OP_ENABLE);
+
+	return true;
+}
+
+static void amd_restore_ibs_fetch(u64 ibs_fetch_ctl)
+{
+	native_wrmsrl(MSR_AMD64_IBSFETCHCTL, ibs_fetch_ctl);
+}
+
+static void amd_restore_ibs_op(u64 ibs_op_ctl)
+{
+	native_wrmsrl(MSR_AMD64_IBSOPCTL, ibs_op_ctl);
+}
+
+bool amd_vibs_ignore_nmi(void)
+{
+	return __this_cpu_read(vibs_window_active);
+}
+EXPORT_SYMBOL_GPL(amd_vibs_ignore_nmi);

If this is only used by the IBS support, it shouldn't have an EXPORT_SYMBOL_GPL.

+
+bool amd_vibs_window(enum amd_vibs_window_state state, u64 *f_ctl,
+		     u64 *o_ctl)
+{
+	bool f_active, o_active;
+
+	switch (state) {
+	case WINDOW_START:
+		if (!f_ctl || !o_ctl)
+			return false;
+
+		if (!is_amd_ibs_started())
+			return false;
+
+		f_active = amd_disable_ibs_fetch(f_ctl);
+		o_active = amd_disable_ibs_op(o_ctl);
+		__this_cpu_write(vibs_window_active, (f_active || o_active));
+		break;
+
+	case WINDOW_STOPPING:
+		if (!f_ctl || !o_ctl)
+			return false;
+
+		if (__this_cpu_read(vibs_window_active))

Shouldn't this be if (!__this_cpu_read(vibs_window_active)) ?

+			return false;
+
+		if (*f_ctl & IBS_FETCH_ENABLE)
+			amd_restore_ibs_fetch(*f_ctl);
+		if (*o_ctl & IBS_OP_ENABLE)
+			amd_restore_ibs_op(*o_ctl);

Nit, these if checks could go into the restore functions to make this look a bit cleaner, but that's just from my point of view.

+
+		break;
+
+	case WINDOW_STOPPED:
+		/*
+		 * This state is executed right after STGI (which is executed
+		 * after VMEXIT).  By this time, host IBS states are already
+		 * restored in WINDOW_STOPPING state, so f_ctl and o_ctl will
+		 * be passed as NULL for this state.
+		 */
+		if (__this_cpu_read(vibs_window_active))
+			__this_cpu_write(vibs_window_active, false);

Any reason not to just issue __this_cpu_write(vibs_window_active, false) without the if check?

+		break;
+
+	default:
+		return false;
+	}
+
+	return __this_cpu_read(vibs_window_active);

You could just issue a return true or return false (depending on the case) instead of having the break statements, e.g.:

For WINDOW_START replace break with return (f_active || o_active)
For WINDOW_STOPPING replace break with return true
For WINDOW_STOPPED replace break with return false

+}
+EXPORT_SYMBOL_GPL(amd_vibs_window);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 85a9fd5a3ec3..b87c235e0e1e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -486,6 +486,12 @@ struct pebs_xmm {
  #define IBS_OP_MAX_CNT_EXT_MASK	(0x7FULL<<20)	/* separate upper 7 bits */
  #define IBS_RIP_INVALID		(1ULL<<38)
+enum amd_vibs_window_state {
+	WINDOW_START = 0,
+	WINDOW_STOPPING,
+	WINDOW_STOPPED,
+};
+
  #ifdef CONFIG_X86_LOCAL_APIC
  extern u32 get_ibs_caps(void);
  extern int forward_event_to_ibs(struct perf_event *event);
@@ -584,6 +590,27 @@ static inline void intel_pt_handle_vmx(int on)
  }
  #endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_CPU_SUP_AMD)
+extern bool amd_vibs_window(enum amd_vibs_window_state state, u64 *vibs_fetch_ctl,
+			    u64 *vibs_op_ctl);
+extern bool is_amd_ibs_started(void);
+extern bool amd_vibs_ignore_nmi(void);

And if the two above functions aren't EXPORT_SYMBOL_GPL, then they could go somewhere else instead of here.

Thanks,
Tom

+#else
+static inline bool amd_vibs_window(enum amd_vibs_window_state state, u64 *vibs_fetch_ctl,
+				  u64 *vibs_op_ctl)
+{
+	return false;
+}
+static inline bool is_amd_ibs_started(void)
+{
+	return false;
+}
+static inline bool amd_vibs_ignore_nmi(void)
+{
+	return false;
+}
+#endif
+
  #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
   extern void amd_pmu_enable_virt(void);
   extern void amd_pmu_disable_virt(void);



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux