If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are moved in __precise_count_loop(). Thus, instructions and branches events can be verified against a precise count instead of a rough range. BTW, some intermittent failures on AMD processors using PerfMonV2 is seen due to variance in counts. This probably has to do with the way instructions leading to a VM-Entry or VM-Exit are accounted when counting retired instructions and branches. https://lore.kernel.org/all/6d512a14-ace1-41a3-801e-0beb41425734@xxxxxxx/ So only enable this precise check for Intel processors. Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx> --- x86/pmu.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/x86/pmu.c b/x86/pmu.c index 270f11b9..13c7c45d 100644 --- a/x86/pmu.c +++ b/x86/pmu.c @@ -19,6 +19,11 @@ #define EXPECTED_INSTR 17 #define EXPECTED_BRNCH 5 + +/* Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */ +#define EXTRA_INSTRNS (3 + 3) +#define LOOP_INSTRNS (N * 10 + EXTRA_INSTRNS) +#define LOOP_BRANCHES (N) #define LOOP_ASM(_wrmsr) \ _wrmsr "\n\t" \ "mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \ @@ -123,6 +128,30 @@ static inline void loop(u64 cntrs) __precise_loop(cntrs); } +static void adjust_events_range(struct pmu_event *gp_events, + int instruction_idx, int branch_idx) +{ + /* + * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are + * moved in __precise_loop(). Thus, instructions and branches events + * can be verified against a precise count instead of a rough range. + * + * We see some intermittent failures on AMD processors using PerfMonV2 + * due to variance in counts. This probably has to do with the way + * instructions leading to a VM-Entry or VM-Exit are accounted when + * counting retired instructions and branches. Thus only enable the + * precise validation for Intel processors. + */ + if (pmu.is_intel && this_cpu_has_perf_global_ctrl()) { + /* instructions event */ + gp_events[instruction_idx].min = LOOP_INSTRNS; + gp_events[instruction_idx].max = LOOP_INSTRNS; + /* branches event */ + gp_events[branch_idx].min = LOOP_BRANCHES; + gp_events[branch_idx].max = LOOP_BRANCHES; + } +} + volatile uint64_t irq_received; static void cnt_overflow(isr_regs_t *regs) @@ -832,6 +861,9 @@ static void check_invalid_rdpmc_gp(void) int main(int ac, char **av) { + int instruction_idx; + int branch_idx; + setup_vm(); handle_irq(PMI_VECTOR, cnt_overflow); buf = malloc(N*64); @@ -845,13 +877,18 @@ int main(int ac, char **av) } gp_events = (struct pmu_event *)intel_gp_events; gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); + instruction_idx = INTEL_INSTRUCTIONS_IDX; + branch_idx = INTEL_BRANCHES_IDX; report_prefix_push("Intel"); set_ref_cycle_expectations(); } else { gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); gp_events = (struct pmu_event *)amd_gp_events; + instruction_idx = AMD_INSTRUCTIONS_IDX; + branch_idx = AMD_BRANCHES_IDX; report_prefix_push("AMD"); } + adjust_events_range(gp_events, instruction_idx, branch_idx); printf("PMU version: %d\n", pmu.version); printf("GP counters: %d\n", pmu.nr_gp_counters); -- 2.40.1