On 7/3/2024 3:27 PM, Dapeng Mi wrote: > If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are moved in > __precise_count_loop(). Thus, instructions and branches events can be > verified against a precise count instead of a rough range. > > Signed-off-by: Dapeng Mi <dapeng1.mi@xxxxxxxxxxxxxxx> > --- > x86/pmu.c | 31 +++++++++++++++++++++++++++++++ > 1 file changed, 31 insertions(+) > > diff --git a/x86/pmu.c b/x86/pmu.c > index d005e376..ffb7b4a4 100644 > --- a/x86/pmu.c > +++ b/x86/pmu.c > @@ -19,6 +19,11 @@ > #define EXPECTED_INSTR 17 > #define EXPECTED_BRNCH 5 > > + > +/* Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */ > +#define EXTRA_INSTRNS (3 + 3) > +#define LOOP_INSTRNS (N * 10 + EXTRA_INSTRNS) > +#define LOOP_BRANCHES (N) > #define LOOP_ASM(_wrmsr) \ > _wrmsr "\n\t" \ > "mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \ > @@ -122,6 +127,24 @@ static inline void loop(u64 cntrs) > __precise_loop(cntrs); > } > > +static void adjust_events_range(struct pmu_event *gp_events, > + int instruction_idx, int branch_idx) > +{ > + /* > + * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are > + * moved in __precise_loop(). Thus, instructions and branches events > + * can be verified against a precise count instead of a rough range. > + */ > + if (this_cpu_has_perf_global_ctrl()) { This causes some intermittent failures on AMD processors using PerfMonV2 due to variance in counts. This probably has to do with the way instructions leading to a VM-Entry or VM-Exit are accounted when counting retired instructions and branches. Adding the following change makes all the tests pass again. diff --git a/x86/pmu.c b/x86/pmu.c index 0658a1c1..09a34a3f 100644 --- a/x86/pmu.c +++ b/x86/pmu.c @@ -222,7 +222,7 @@ static void adjust_events_range(struct pmu_event *gp_events, * moved in __precise_loop(). Thus, instructions and branches events * can be verified against a precise count instead of a rough range. */ - if (this_cpu_has_perf_global_ctrl()) { + if (pmu.is_intel && this_cpu_has_perf_global_ctrl()) { /* instructions event */ gp_events[instruction_idx].min = LOOP_INSTRNS; gp_events[instruction_idx].max = LOOP_INSTRNS; > + /* instructions event */ > + gp_events[instruction_idx].min = LOOP_INSTRNS; > + gp_events[instruction_idx].max = LOOP_INSTRNS; > + /* branches event */ > + gp_events[branch_idx].min = LOOP_BRANCHES; > + gp_events[branch_idx].max = LOOP_BRANCHES; > + } > +} > + > volatile uint64_t irq_received; > > static void cnt_overflow(isr_regs_t *regs) > @@ -823,6 +846,9 @@ static void check_invalid_rdpmc_gp(void) > > int main(int ac, char **av) > { > + int instruction_idx; > + int branch_idx; > + > setup_vm(); > handle_irq(PMI_VECTOR, cnt_overflow); > buf = malloc(N*64); > @@ -836,13 +862,18 @@ int main(int ac, char **av) > } > gp_events = (struct pmu_event *)intel_gp_events; > gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); > + instruction_idx = INTEL_INSTRUCTIONS_IDX; > + branch_idx = INTEL_BRANCHES_IDX; > report_prefix_push("Intel"); > set_ref_cycle_expectations(); > } else { > gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); > gp_events = (struct pmu_event *)amd_gp_events; > + instruction_idx = AMD_INSTRUCTIONS_IDX; > + branch_idx = AMD_BRANCHES_IDX; > report_prefix_push("AMD"); > } > + adjust_events_range(gp_events, instruction_idx, branch_idx); > > printf("PMU version: %d\n", pmu.version); > printf("GP counters: %d\n", pmu.nr_gp_counters);