Re: [kvm-unit-tests PATCH v2 2/2] s390x: add exittime tests

Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx> · Thu, 6 Oct 2022 13:15:55 +0200

On Thu,  1 Sep 2022 17:09:56 +0200
Nico Boehr <nrb@xxxxxxxxxxxxx> wrote:

> Add a test to measure the execution time of several instructions. This
> can be helpful in finding performance regressions in hypervisor code.
> 
> All tests are currently reported as PASS, since the baseline for their
> execution time depends on the respective environment and since needs to
> be determined on a case-by-case basis.
> 
> Signed-off-by: Nico Boehr <nrb@xxxxxxxxxxxxx>
> ---
>  s390x/Makefile      |   1 +
>  s390x/exittime.c    | 255 ++++++++++++++++++++++++++++++++++++++++++++
>  s390x/unittests.cfg |   4 +
>  3 files changed, 260 insertions(+)
>  create mode 100644 s390x/exittime.c
> 
> diff --git a/s390x/Makefile b/s390x/Makefile
> index efd5e0c13102..5dcac244767f 100644
> --- a/s390x/Makefile
> +++ b/s390x/Makefile
> @@ -34,6 +34,7 @@ tests += $(TEST_DIR)/migration.elf
>  tests += $(TEST_DIR)/pv-attest.elf
>  tests += $(TEST_DIR)/migration-cmm.elf
>  tests += $(TEST_DIR)/migration-skey.elf
> +tests += $(TEST_DIR)/exittime.elf
>  
>  pv-tests += $(TEST_DIR)/pv-diags.elf
>  
> diff --git a/s390x/exittime.c b/s390x/exittime.c
> new file mode 100644
> index 000000000000..543c82ff3906
> --- /dev/null
> +++ b/s390x/exittime.c
> @@ -0,0 +1,255 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Measure run time of various instructions. Can be used to find runtime
> + * regressions of instructions which cause exits.
> + *
> + * Copyright IBM Corp. 2022
> + *
> + * Authors:
> + *  Nico Boehr <nrb@xxxxxxxxxxxxx>
> + */
> +#include <libcflat.h>
> +#include <smp.h>
> +#include <sclp.h>
> +#include <asm/time.h>
> +#include <asm/sigp.h>
> +#include <asm/interrupt.h>
> +#include <asm/page.h>
> +
> +char pagebuf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
> +
> +static void test_sigp_sense_running(long destcpu)
> +{
> +	smp_sigp(destcpu, SIGP_SENSE_RUNNING, 0, NULL);
> +}
> +
> +static void test_nop(long ignore)
> +{
> +	/* nops don't trap into the hypervisor, so let's test them for reference */
> +	asm volatile("nop" : : : "memory");
> +}
> +
> +static void test_diag9c(long destcpu)
> +{
> +	asm volatile("diag %[destcpu],0,0x9c"
> +		:
> +		: [destcpu] "d" (destcpu)
> +		:

here you leave some unused :

> +	);
> +}
> +
> +static long setup_get_this_cpuaddr(long ignore)
> +{
> +	return stap();
> +}
> +
> +static void test_diag44(long ignore)
> +{
> +	asm volatile("diag 0,0,0x44");
> +}
> +
> +static void test_stnsm(long ignore)
> +{
> +	int out;
> +
> +	asm volatile(
> +		"stnsm %[out],0xff"
> +		: [out] "=Q" (out)
> +		:

here too, but you are using only 2

> +	);
> +}
> +
> +static void test_stosm(long ignore)
> +{
> +	int out;
> +
> +	asm volatile(
> +		"stosm %[out],0"
> +		: [out] "=Q" (out)
> +		:
> +	);
> +}
> +
> +static long setup_ssm(long ignore)
> +{
> +	long system_mask = 0;
> +
> +	asm volatile(
> +		"stosm %[system_mask],0"
> +		: [system_mask] "=Q" (system_mask)
> +		:
> +		:
> +	);
> +
> +	return system_mask;
> +}
> +
> +static void test_ssm(long old_system_mask)
> +{
> +	asm volatile(
> +		"ssm %[old_system_mask]"
> +		:
> +		: [old_system_mask] "Q" (old_system_mask)
> +		:
> +	);
> +}
> +
> +static long setup_lctl4(long ignore)
> +{
> +	long ctl4_orig = 0;
> +
> +	asm volatile(
> +		"stctg 4,4,%[ctl4_orig]"
> +		: [ctl4_orig] "=S" (ctl4_orig)
> +		:
> +		:
> +	);
> +
> +	return ctl4_orig;
> +}
> +
> +static void test_lctl4(long ctl4_orig)
> +{
> +	asm volatile(
> +		"lctlg 4,4,%[ctl4_orig]"
> +		:
> +		: [ctl4_orig] "S" (ctl4_orig)
> +		:
> +	);
> +}
> +
> +static void test_stpx(long ignore)
> +{
> +	unsigned int prefix;
> +
> +	asm volatile(
> +		"stpx %[prefix]"
> +		: [prefix] "=Q" (prefix)

here you are only using the : you actually need

> +	);
> +}
> +
> +static void test_stfl(long ignore)
> +{
> +	asm volatile(
> +		"stfl 0" : : : "memory"
> +	);
> +}
> +
> +static void test_epsw(long ignore)
> +{
> +	long r1, r2;
> +
> +	asm volatile(
> +		"epsw %[r1], %[r2]"
> +		: [r1] "=d" (r1), [r2] "=d" (r2)
> +		:
> +		:
> +	);
> +}
> +
> +static void test_illegal(long ignore)
> +{
> +	expect_pgm_int();
> +	asm volatile(
> +		".word 0"
> +		:
> +		:
> +		:

here none are needed

> +	);
> +	clear_pgm_int();
> +}

decide how you want to do for the : and then do it uniformly.

either you always put all three : (except when none are needed), or you
always only put as many as needed, without empty trailing ones.

> +
> +static long setup_servc(long arg)
> +{
> +	memset(pagebuf, 0, PAGE_SIZE);
> +	return arg;
> +}
> +
> +static void test_servc(long ignore)
> +{
> +	SCCB *sccb = (SCCB *) pagebuf;
> +
> +	sccb->h.length = 8;
> +	servc(0, (unsigned long) sccb);
> +}
> +
> +static void test_stsi(long fc)
> +{
> +	stsi(pagebuf, fc, 2, 2);
> +}
> +
> +struct test {
> +	const char *name;
> +	/*
> +	 * When non-null, will be called once before running the test loop.
> +	 * Its return value will be given as argument to testfunc.
> +	 */
> +	long (*setupfunc)(long arg);
> +	void (*testfunc)(long arg);
> +	long arg;
> +	long iters;
> +} const exittime_tests[] = {
> +	{"nop",                   NULL,                   test_nop,                0, 200000 },
> +	{"sigp sense running(0)", NULL,                   test_sigp_sense_running, 0, 20000 },
> +	{"sigp sense running(1)", NULL,                   test_sigp_sense_running, 1, 20000 },
> +	{"diag9c(self)",          setup_get_this_cpuaddr, test_diag9c,             0, 2000 },
> +	{"diag9c(0)",             NULL,                   test_diag9c,             0, 2000 },
> +	{"diag9c(1)",             NULL,                   test_diag9c,             1, 2000 },
> +	{"diag44",                NULL,                   test_diag44,             0, 2000 },
> +	{"stnsm",                 NULL,                   test_stnsm,              0, 200000 },
> +	{"stosm",                 NULL,                   test_stosm,              0, 200000 },
> +	{"ssm",                   setup_ssm,              test_ssm,                0, 200000 },
> +	{"lctl4",                 setup_lctl4,            test_lctl4,              0, 20000 },
> +	{"stpx",                  NULL,                   test_stpx,               0, 2000 },
> +	{"stfl",                  NULL,                   test_stfl,               0, 2000 },
> +	{"epsw",                  NULL,                   test_epsw,               0, 20000 },
> +	{"illegal",               NULL,                   test_illegal,            0, 2000 },
> +	{"servc",                 setup_servc,            test_servc,              0, 2000 },
> +	{"stsi122",               NULL,                   test_stsi,               1, 200 },
> +	{"stsi222",               NULL,                   test_stsi,               2, 200 },
> +	{"stsi322",               NULL,                   test_stsi,               3, 200 },
> +};
> +
> +static uint64_t tod_to_us(uint64_t tod)
> +{
> +	return tod >> STCK_SHIFT_US;
> +}
> +
> +int main(void)
> +{
> +	int i, j, k, testfunc_arg;
> +	const int outer_iters = 100;
> +	struct test const *current_test;
> +	uint64_t start, end, elapsed, worst, best, total;
> +
> +	report_prefix_push("exittime");
> +	report_pass("reporting total/best/worst of %d outer iterations", outer_iters);
> +
> +	for (i = 0; i < ARRAY_SIZE(exittime_tests); i++) {
> +		current_test = &exittime_tests[i];
> +		total = 0;
> +		worst = 0;
> +		best = -1;
> +		report_prefix_pushf("%s", current_test->name);
> +
> +		testfunc_arg = current_test->arg;
> +		if (current_test->setupfunc)
> +			testfunc_arg = current_test->setupfunc(testfunc_arg);
> +
> +		for (j = 0; j < outer_iters; j++) {
> +			start = get_clock_fast();
> +			for (k = 0; k < current_test->iters; k++)
> +				current_test->testfunc(testfunc_arg);
> +			end = get_clock_fast();
> +			elapsed = end - start;
> +			best = MIN(best, elapsed);
> +			worst = MAX(worst, elapsed);
> +			total += elapsed;
> +		}
> +		report_pass("iters/total/best/worst %lu/%lu/%lu/%lu us", current_test->iters, tod_to_us(total), tod_to_us(best), tod_to_us(worst));

perhaps it would not be a bad idea to also print the average and the
standard deviation (σ)

> +		report_prefix_pop();
> +	}
> +
> +	report_prefix_pop();
> +	return report_summary();
> +}
> diff --git a/s390x/unittests.cfg b/s390x/unittests.cfg
> index f7b1fc3dbca1..c11d1d987c82 100644
> --- a/s390x/unittests.cfg
> +++ b/s390x/unittests.cfg
> @@ -185,3 +185,7 @@ groups = migration
>  [migration-skey]
>  file = migration-skey.elf
>  groups = migration
> +
> +[exittime]
> +file = exittime.elf
> +smp = 2