On Thu, 1 Sep 2022 17:09:56 +0200 Nico Boehr <nrb@xxxxxxxxxxxxx> wrote: > Add a test to measure the execution time of several instructions. This > can be helpful in finding performance regressions in hypervisor code. > > All tests are currently reported as PASS, since the baseline for their > execution time depends on the respective environment and since needs to > be determined on a case-by-case basis. > > Signed-off-by: Nico Boehr <nrb@xxxxxxxxxxxxx> > --- > s390x/Makefile | 1 + > s390x/exittime.c | 255 ++++++++++++++++++++++++++++++++++++++++++++ > s390x/unittests.cfg | 4 + > 3 files changed, 260 insertions(+) > create mode 100644 s390x/exittime.c > > diff --git a/s390x/Makefile b/s390x/Makefile > index efd5e0c13102..5dcac244767f 100644 > --- a/s390x/Makefile > +++ b/s390x/Makefile > @@ -34,6 +34,7 @@ tests += $(TEST_DIR)/migration.elf > tests += $(TEST_DIR)/pv-attest.elf > tests += $(TEST_DIR)/migration-cmm.elf > tests += $(TEST_DIR)/migration-skey.elf > +tests += $(TEST_DIR)/exittime.elf > > pv-tests += $(TEST_DIR)/pv-diags.elf > > diff --git a/s390x/exittime.c b/s390x/exittime.c > new file mode 100644 > index 000000000000..543c82ff3906 > --- /dev/null > +++ b/s390x/exittime.c > @@ -0,0 +1,255 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * Measure run time of various instructions. Can be used to find runtime > + * regressions of instructions which cause exits. > + * > + * Copyright IBM Corp. 2022 > + * > + * Authors: > + * Nico Boehr <nrb@xxxxxxxxxxxxx> > + */ > +#include <libcflat.h> > +#include <smp.h> > +#include <sclp.h> > +#include <asm/time.h> > +#include <asm/sigp.h> > +#include <asm/interrupt.h> > +#include <asm/page.h> > + > +char pagebuf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); > + > +static void test_sigp_sense_running(long destcpu) > +{ > + smp_sigp(destcpu, SIGP_SENSE_RUNNING, 0, NULL); > +} > + > +static void test_nop(long ignore) > +{ > + /* nops don't trap into the hypervisor, so let's test them for reference */ > + asm volatile("nop" : : : "memory"); > +} > + > +static void test_diag9c(long destcpu) > +{ > + asm volatile("diag %[destcpu],0,0x9c" > + : > + : [destcpu] "d" (destcpu) > + : here you leave some unused : > + ); > +} > + > +static long setup_get_this_cpuaddr(long ignore) > +{ > + return stap(); > +} > + > +static void test_diag44(long ignore) > +{ > + asm volatile("diag 0,0,0x44"); > +} > + > +static void test_stnsm(long ignore) > +{ > + int out; > + > + asm volatile( > + "stnsm %[out],0xff" > + : [out] "=Q" (out) > + : here too, but you are using only 2 > + ); > +} > + > +static void test_stosm(long ignore) > +{ > + int out; > + > + asm volatile( > + "stosm %[out],0" > + : [out] "=Q" (out) > + : > + ); > +} > + > +static long setup_ssm(long ignore) > +{ > + long system_mask = 0; > + > + asm volatile( > + "stosm %[system_mask],0" > + : [system_mask] "=Q" (system_mask) > + : > + : > + ); > + > + return system_mask; > +} > + > +static void test_ssm(long old_system_mask) > +{ > + asm volatile( > + "ssm %[old_system_mask]" > + : > + : [old_system_mask] "Q" (old_system_mask) > + : > + ); > +} > + > +static long setup_lctl4(long ignore) > +{ > + long ctl4_orig = 0; > + > + asm volatile( > + "stctg 4,4,%[ctl4_orig]" > + : [ctl4_orig] "=S" (ctl4_orig) > + : > + : > + ); > + > + return ctl4_orig; > +} > + > +static void test_lctl4(long ctl4_orig) > +{ > + asm volatile( > + "lctlg 4,4,%[ctl4_orig]" > + : > + : [ctl4_orig] "S" (ctl4_orig) > + : > + ); > +} > + > +static void test_stpx(long ignore) > +{ > + unsigned int prefix; > + > + asm volatile( > + "stpx %[prefix]" > + : [prefix] "=Q" (prefix) here you are only using the : you actually need > + ); > +} > + > +static void test_stfl(long ignore) > +{ > + asm volatile( > + "stfl 0" : : : "memory" > + ); > +} > + > +static void test_epsw(long ignore) > +{ > + long r1, r2; > + > + asm volatile( > + "epsw %[r1], %[r2]" > + : [r1] "=d" (r1), [r2] "=d" (r2) > + : > + : > + ); > +} > + > +static void test_illegal(long ignore) > +{ > + expect_pgm_int(); > + asm volatile( > + ".word 0" > + : > + : > + : here none are needed > + ); > + clear_pgm_int(); > +} decide how you want to do for the : and then do it uniformly. either you always put all three : (except when none are needed), or you always only put as many as needed, without empty trailing ones. > + > +static long setup_servc(long arg) > +{ > + memset(pagebuf, 0, PAGE_SIZE); > + return arg; > +} > + > +static void test_servc(long ignore) > +{ > + SCCB *sccb = (SCCB *) pagebuf; > + > + sccb->h.length = 8; > + servc(0, (unsigned long) sccb); > +} > + > +static void test_stsi(long fc) > +{ > + stsi(pagebuf, fc, 2, 2); > +} > + > +struct test { > + const char *name; > + /* > + * When non-null, will be called once before running the test loop. > + * Its return value will be given as argument to testfunc. > + */ > + long (*setupfunc)(long arg); > + void (*testfunc)(long arg); > + long arg; > + long iters; > +} const exittime_tests[] = { > + {"nop", NULL, test_nop, 0, 200000 }, > + {"sigp sense running(0)", NULL, test_sigp_sense_running, 0, 20000 }, > + {"sigp sense running(1)", NULL, test_sigp_sense_running, 1, 20000 }, > + {"diag9c(self)", setup_get_this_cpuaddr, test_diag9c, 0, 2000 }, > + {"diag9c(0)", NULL, test_diag9c, 0, 2000 }, > + {"diag9c(1)", NULL, test_diag9c, 1, 2000 }, > + {"diag44", NULL, test_diag44, 0, 2000 }, > + {"stnsm", NULL, test_stnsm, 0, 200000 }, > + {"stosm", NULL, test_stosm, 0, 200000 }, > + {"ssm", setup_ssm, test_ssm, 0, 200000 }, > + {"lctl4", setup_lctl4, test_lctl4, 0, 20000 }, > + {"stpx", NULL, test_stpx, 0, 2000 }, > + {"stfl", NULL, test_stfl, 0, 2000 }, > + {"epsw", NULL, test_epsw, 0, 20000 }, > + {"illegal", NULL, test_illegal, 0, 2000 }, > + {"servc", setup_servc, test_servc, 0, 2000 }, > + {"stsi122", NULL, test_stsi, 1, 200 }, > + {"stsi222", NULL, test_stsi, 2, 200 }, > + {"stsi322", NULL, test_stsi, 3, 200 }, > +}; > + > +static uint64_t tod_to_us(uint64_t tod) > +{ > + return tod >> STCK_SHIFT_US; > +} > + > +int main(void) > +{ > + int i, j, k, testfunc_arg; > + const int outer_iters = 100; > + struct test const *current_test; > + uint64_t start, end, elapsed, worst, best, total; > + > + report_prefix_push("exittime"); > + report_pass("reporting total/best/worst of %d outer iterations", outer_iters); > + > + for (i = 0; i < ARRAY_SIZE(exittime_tests); i++) { > + current_test = &exittime_tests[i]; > + total = 0; > + worst = 0; > + best = -1; > + report_prefix_pushf("%s", current_test->name); > + > + testfunc_arg = current_test->arg; > + if (current_test->setupfunc) > + testfunc_arg = current_test->setupfunc(testfunc_arg); > + > + for (j = 0; j < outer_iters; j++) { > + start = get_clock_fast(); > + for (k = 0; k < current_test->iters; k++) > + current_test->testfunc(testfunc_arg); > + end = get_clock_fast(); > + elapsed = end - start; > + best = MIN(best, elapsed); > + worst = MAX(worst, elapsed); > + total += elapsed; > + } > + report_pass("iters/total/best/worst %lu/%lu/%lu/%lu us", current_test->iters, tod_to_us(total), tod_to_us(best), tod_to_us(worst)); perhaps it would not be a bad idea to also print the average and the standard deviation (σ) > + report_prefix_pop(); > + } > + > + report_prefix_pop(); > + return report_summary(); > +} > diff --git a/s390x/unittests.cfg b/s390x/unittests.cfg > index f7b1fc3dbca1..c11d1d987c82 100644 > --- a/s390x/unittests.cfg > +++ b/s390x/unittests.cfg > @@ -185,3 +185,7 @@ groups = migration > [migration-skey] > file = migration-skey.elf > groups = migration > + > +[exittime] > +file = exittime.elf > +smp = 2