+
typedef struct {
uint32_t ctr;
uint64_t config;
@@ -54,13 +68,43 @@ char *buf;
static struct pmu_event *gp_events;
static unsigned int gp_events_size;
-static inline void loop(void)
+
+static inline void __loop(void)
+{
+ unsigned long tmp, tmp2, tmp3;
+
+ asm volatile(LOOP_ASM
+ : "=c"(tmp), "=r"(tmp2), "=r"(tmp3)
+ : "0"(N), "1"(buf));
+}
+
+/*
+ * Enable and disable counters in a whole asm blob to ensure
+ * no other instructions are counted in the time slot between
+ * counters enabling and really LOOP_ASM code executing.
+ * Thus counters can verify instructions and branches events
+ * against precise counts instead of a rough valid count range.
+ */
+static inline void __precise_count_loop(u64 cntrs)
{
unsigned long tmp, tmp2, tmp3;
+ unsigned int global_ctl = pmu.msr_global_ctl;
+ u32 eax = cntrs & (BIT_ULL(32) - 1);
+ u32 edx = cntrs >> 32;
- asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
- : "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
+ asm volatile(PRECISE_LOOP_ASM
+ : "=b"(tmp), "=r"(tmp2), "=r"(tmp3)
+ : "a"(eax), "d"(edx), "c"(global_ctl),
+ "0"(N), "1"(buf)
+ : "edi");
+}
+static inline void loop(u64 cntrs)
+{
+ if (!this_cpu_has_perf_global_ctrl())
+ __loop();
+ else
+ __precise_count_loop(cntrs);
}
volatile uint64_t irq_received;
@@ -159,18 +203,17 @@ static void __start_event(pmu_counter_t *evt, uint64_t count)
ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
}
- global_enable(evt);
apic_write(APIC_LVTPC, PMI_VECTOR);
}
static void start_event(pmu_counter_t *evt)
{
__start_event(evt, 0);
+ global_enable(evt);
}
-static void stop_event(pmu_counter_t *evt)
+static void __stop_event(pmu_counter_t *evt)
{
- global_disable(evt);
if (is_gp(evt)) {
wrmsr(MSR_GP_EVENT_SELECTx(event_to_global_idx(evt)),
evt->config & ~EVNTSEL_EN);
@@ -182,14 +225,24 @@ static void stop_event(pmu_counter_t *evt)
evt->count = rdmsr(evt->ctr);
}
+static void stop_event(pmu_counter_t *evt)
+{
+ global_disable(evt);
+ __stop_event(evt);
+}
+
static noinline void measure_many(pmu_counter_t *evt, int count)
{
int i;
+ u64 cntrs = 0;
+
+ for (i = 0; i < count; i++) {
+ __start_event(&evt[i], 0);
+ cntrs |= BIT_ULL(event_to_global_idx(&evt[i]));
+ }
+ loop(cntrs);
for (i = 0; i < count; i++)
- start_event(&evt[i]);
- loop();
- for (i = 0; i < count; i++)
- stop_event(&evt[i]);
+ __stop_event(&evt[i]);
}
static void measure_one(pmu_counter_t *evt)
@@ -199,9 +252,11 @@ static void measure_one(pmu_counter_t *evt)
static noinline void __measure(pmu_counter_t *evt, uint64_t count)
{
+ u64 cntrs = BIT_ULL(event_to_global_idx(evt));
+
__start_event(evt, count);
- loop();
- stop_event(evt);
+ loop(cntrs);
+ __stop_event(evt);
}
static bool verify_event(uint64_t count, struct pmu_event *e)
@@ -451,7 +506,7 @@ static void check_running_counter_wrmsr(void)
report_prefix_push("running counter wrmsr");
start_event(&evt);
- loop();
+ __loop();
wrmsr(MSR_GP_COUNTERx(0), 0);
stop_event(&evt);
report(evt.count < gp_events[0].min, "cntr");
@@ -468,7 +523,7 @@ static void check_running_counter_wrmsr(void)
wrmsr(MSR_GP_COUNTERx(0), count);
- loop();
+ __loop();
stop_event(&evt);
if (this_cpu_has_perf_global_status()) {
--
2.34.1