On Tue Jun 4, 2024 at 8:38 PM AEST, Thomas Huth wrote: > On 04/05/2024 14.28, Nicholas Piggin wrote: > > Add some initial PMU testing. > > > > - PMC5/6 tests > > - PMAE / PMI test > > - BHRB basic tests > > > > Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> > > --- > ... > > diff --git a/lib/powerpc/setup.c b/lib/powerpc/setup.c > > index a4ff678ce..8ff4939e2 100644 > > --- a/lib/powerpc/setup.c > > +++ b/lib/powerpc/setup.c > > @@ -33,6 +33,7 @@ u32 initrd_size; > > u32 cpu_to_hwid[NR_CPUS] = { [0 ... NR_CPUS-1] = (~0U) }; > > int nr_cpus_present; > > uint64_t tb_hz; > > +uint64_t cpu_hz; > > > > struct mem_region mem_regions[NR_MEM_REGIONS]; > > phys_addr_t __physical_start, __physical_end; > > @@ -42,6 +43,7 @@ struct cpu_set_params { > > unsigned icache_bytes; > > unsigned dcache_bytes; > > uint64_t tb_hz; > > + uint64_t cpu_hz; > > }; > > > > static void cpu_set(int fdtnode, u64 regval, void *info) > > @@ -95,6 +97,22 @@ static void cpu_set(int fdtnode, u64 regval, void *info) > > data = (u32 *)prop->data; > > params->tb_hz = fdt32_to_cpu(*data); > > > > + prop = fdt_get_property(dt_fdt(), fdtnode, > > + "ibm,extended-clock-frequency", NULL); > > + if (prop) { > > + data = (u32 *)prop->data; > > + params->cpu_hz = fdt32_to_cpu(*data); > > + params->cpu_hz <<= 32; > > + data = (u32 *)prop->data + 1; > > + params->cpu_hz |= fdt32_to_cpu(*data); > > Why don't you simply cast to (u64 *) and use fdt64_to_cpu() here instead? Hmm... probably because I copied from somewhere. Good idea though. > > ... > > diff --git a/powerpc/pmu.c b/powerpc/pmu.c > > new file mode 100644 > > index 000000000..8b13ee4cd > > --- /dev/null > > +++ b/powerpc/pmu.c > > @@ -0,0 +1,403 @@ > ... > > +static void test_pmc5_with_fault(void) > > +{ > > + unsigned long pmc5_1, pmc5_2; > > + > > + handle_exception(0x700, &illegal_handler, NULL); > > + handle_exception(0xe40, &illegal_handler, NULL); > > + > > + reset_mmcr0(); > > + mtspr(SPR_PMC5, 0); > > + mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56)); > > + asm volatile(".long 0x0" ::: "memory"); > > + mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56)); > > + assert(got_interrupt); > > + got_interrupt = false; > > + pmc5_1 = mfspr(SPR_PMC5); > > + > > + reset_mmcr0(); > > + mtspr(SPR_PMC5, 0); > > + mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56)); > > + asm volatile(".rep 20 ; nop ; .endr ; .long 0x0" ::: "memory"); > > + mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56)); > > + assert(got_interrupt); > > + got_interrupt = false; > > + pmc5_2 = mfspr(SPR_PMC5); > > + > > + /* TCG and POWER9 do not count instructions around faults correctly */ > > + report_kfail(true, pmc5_1 + 20 == pmc5_2, "PMC5 counts instructions with fault"); > > It would be nice to have the TCG detection patch before this patch, so you > could use the right condition here right from the start. Yeah, it turned out to be a bit annoying to rebase. We already have some kfail(true in the tree but I will remove those at least toward the end of the series. I might take another look at reordering it after I rebase what you have merged. > > > + handle_exception(0x700, NULL, NULL); > > + handle_exception(0xe40, NULL, NULL); > > +} > > + > > +static void test_pmc5_with_sc(void) > > +{ > > + unsigned long pmc5_1, pmc5_2; > > + > > + handle_exception(0xc00, &sc_handler, NULL); > > + > > + reset_mmcr0(); > > + mtspr(SPR_PMC5, 0); > > + mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56)); > > + asm volatile("sc 0" ::: "memory"); > > + mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56)); > > + assert(got_interrupt); > > + got_interrupt = false; > > + pmc5_1 = mfspr(SPR_PMC5); > > + > > + reset_mmcr0(); > > + mtspr(SPR_PMC5, 0); > > + mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~(MMCR0_FC | MMCR0_FC56)); > > + asm volatile(".rep 20 ; nop ; .endr ; sc 0" ::: "memory"); > > + mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56)); > > + assert(got_interrupt); > > + got_interrupt = false; > > + pmc5_2 = mfspr(SPR_PMC5); > > + > > + /* TCG does not count instructions around syscalls correctly */ > > + report_kfail(true, pmc5_1 + 20 == pmc5_2, "PMC5 counts instructions with syscall"); > > dito > > > + handle_exception(0xc00, NULL, NULL); > > +} > > + > > +static void test_pmc56(void) > > +{ > > + unsigned long tmp; > > + > > + report_prefix_push("pmc56"); > > + > > + reset_mmcr0(); > > + mtspr(SPR_PMC5, 0); > > + mtspr(SPR_PMC6, 0); > > + report(mfspr(SPR_PMC5) == 0, "PMC5 zeroed"); > > + report(mfspr(SPR_PMC6) == 0, "PMC6 zeroed"); > > + mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_FC); > > + msleep(100); > > + report(mfspr(SPR_PMC5) == 0, "PMC5 frozen"); > > + report(mfspr(SPR_PMC6) == 0, "PMC6 frozen"); > > + mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) & ~MMCR0_FC56); > > + mdelay(100); > > + mtspr(SPR_MMCR0, mfspr(SPR_MMCR0) | (MMCR0_FC | MMCR0_FC56)); > > + report(mfspr(SPR_PMC5) != 0, "PMC5 counting"); > > + report(mfspr(SPR_PMC6) != 0, "PMC6 counting"); > > + > > + /* Dynamic frequency scaling could cause to be out, so don't fail. */ > > + tmp = mfspr(SPR_PMC6); > > + report(true, "PMC6 ratio to reported clock frequency is %ld%%", tmp * 1000 / cpu_hz); > > report(true, ...) looks weird. Use report_info() instead? Ah yes that's better. I was going to do a pass/fail threshold but that gets pretty arbitrary depending on DVFS.. I guess for TCG we could report a pass/fail. Thanks, Nick