From: Subhasish Ghosh <subhasish.ghosh@xxxxxxx> Test that the FPU/SIMD registers are saved and restored correctly when context switching CPUs. In order to test fpu/simd functionality, we need to make sure that kvm-unit-tests doesn't generate code that uses the fpu registers, as that might interfere with the test results. Thus make sure we compile the tests with -mgeneral-regs-only. Signed-off-by: Subhasish Ghosh <subhasish.ghosh@xxxxxxx> [ Added SVE register tests ] Signed-off-by: Joey Gouly <joey.gouly@xxxxxxx> Signed-off-by: Suzuki K Poulose <suzuki.poulose@xxxxxxx> --- arm/Makefile.arm64 | 9 + arm/cstart64.S | 1 + arm/fpu.c | 424 ++++++++++++++++++++++++++++++++++++++ arm/unittests.cfg | 8 + lib/arm64/asm/processor.h | 26 +++ lib/arm64/asm/sysreg.h | 7 + 6 files changed, 475 insertions(+) create mode 100644 arm/fpu.c diff --git a/arm/Makefile.arm64 b/arm/Makefile.arm64 index 90d95e79..5a9943c8 100644 --- a/arm/Makefile.arm64 +++ b/arm/Makefile.arm64 @@ -10,9 +10,17 @@ arch_LDFLAGS = -pie -n arch_LDFLAGS += -z notext CFLAGS += -mstrict-align +sve_flag := $(call cc-option, -march=armv8.5-a+sve, "") +ifneq ($(strip $(sve_flag)),) +# Don't pass the option to the compiler, we don't +# want the compiler to generate SVE instructions. +CFLAGS += -DCC_HAS_SVE +endif + mno_outline_atomics := $(call cc-option, -mno-outline-atomics, "") CFLAGS += $(mno_outline_atomics) CFLAGS += -DCONFIG_RELOC +CFLAGS += -mgeneral-regs-only define arch_elf_check = $(if $(shell ! $(READELF) -rW $(1) >&/dev/null && echo "nok"), @@ -49,6 +57,7 @@ tests = $(TEST_DIR)/timer.$(exe) tests += $(TEST_DIR)/micro-bench.$(exe) tests += $(TEST_DIR)/cache.$(exe) tests += $(TEST_DIR)/debug.$(exe) +tests += $(TEST_DIR)/fpu.$(exe) tests += $(TEST_DIR)/realm-rsi.$(exe) include $(SRCDIR)/$(TEST_DIR)/Makefile.common diff --git a/arm/cstart64.S b/arm/cstart64.S index c081365f..53acf796 100644 --- a/arm/cstart64.S +++ b/arm/cstart64.S @@ -12,6 +12,7 @@ #include <asm/ptrace.h> #include <asm/page.h> #include <asm/pgtable-hwdef.h> +#include <asm/processor.h> #include <asm/thread_info.h> #include <asm/sysreg.h> #include <asm/smc-rsi.h> diff --git a/arm/fpu.c b/arm/fpu.c new file mode 100644 index 00000000..06e5a845 --- /dev/null +++ b/arm/fpu.c @@ -0,0 +1,424 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Arm Limited. + * All rights reserved. + */ + +#include <libcflat.h> +#include <asm/smp.h> +#include <stdlib.h> + +#include <asm/rsi.h> + +#define CPU0_ID 0 +#define CPU1_ID (CPU0_ID + 1) +#define CPUS_MAX (CPU1_ID + 1) +#define FPU_QREG_MAX 32 +#define FPU_RESULT_PASS (-1U) + +/* + * Write 8 bytes of random data in random. Returns true on success, false on + * failure. + */ +static inline bool arch_collect_entropy(uint64_t *random) +{ + unsigned long ret; + + asm volatile( + " mrs %[ptr], " xstr(RNDR) "\n" + " cset %[ret], ne\n" /* RNDR sets NZCV to 0b0100 on failure */ + : + [ret] "=r" (ret), + [ptr] "=r" (*random) + : + : "cc" + ); + + return ret == 1; +} + +#define fpu_reg_read(val) \ +({ \ + uint64_t *__val = (val); \ + asm volatile("stp q0, q1, [%0], #32\n\t" \ + "stp q2, q3, [%0], #32\n\t" \ + "stp q4, q5, [%0], #32\n\t" \ + "stp q6, q7, [%0], #32\n\t" \ + "stp q8, q9, [%0], #32\n\t" \ + "stp q10, q11, [%0], #32\n\t" \ + "stp q12, q13, [%0], #32\n\t" \ + "stp q14, q15, [%0], #32\n\t" \ + "stp q16, q17, [%0], #32\n\t" \ + "stp q18, q19, [%0], #32\n\t" \ + "stp q20, q21, [%0], #32\n\t" \ + "stp q22, q23, [%0], #32\n\t" \ + "stp q24, q25, [%0], #32\n\t" \ + "stp q26, q27, [%0], #32\n\t" \ + "stp q28, q29, [%0], #32\n\t" \ + "stp q30, q31, [%0], #32\n\t" \ + : "=r" (__val) \ + : \ + : "q0", "q1", "q2", "q3", \ + "q4", "q5", "q6", "q7", \ + "q8", "q9", "q10", "q11", \ + "q12", "q13", "q14", \ + "q15", "q16", "q17", \ + "q18", "q19", "q20", \ + "q21", "q22", "q23", \ + "q24", "q25", "q26", \ + "q27", "q28", "q29", \ + "q30", "q31", "memory"); \ +}) + +#define fpu_reg_write(val) \ +do { \ + uint64_t *__val = (val); \ + asm volatile("ldp q0, q1, [%0], #32\n\t" \ + "ldp q2, q3, [%0], #32\n\t" \ + "ldp q4, q5, [%0], #32\n\t" \ + "ldp q6, q7, [%0], #32\n\t" \ + "ldp q8, q9, [%0], #32\n\t" \ + "ldp q10, q11, [%0], #32\n\t" \ + "ldp q12, q13, [%0], #32\n\t" \ + "ldp q14, q15, [%0], #32\n\t" \ + "ldp q16, q17, [%0], #32\n\t" \ + "ldp q18, q19, [%0], #32\n\t" \ + "ldp q20, q21, [%0], #32\n\t" \ + "ldp q22, q23, [%0], #32\n\t" \ + "ldp q24, q25, [%0], #32\n\t" \ + "ldp q26, q27, [%0], #32\n\t" \ + "ldp q28, q29, [%0], #32\n\t" \ + "ldp q30, q31, [%0], #32\n\t" \ + : \ + : "r" (__val) \ + : "q0", "q1", "q2", "q3", \ + "q4", "q5", "q6", "q7", \ + "q8", "q9", "q10", "q11", \ + "q12", "q13", "q14", \ + "q15", "q16", "q17", \ + "q18", "q19", "q20", \ + "q21", "q22", "q23", \ + "q24", "q25", "q26", \ + "q27", "q28", "q29", \ + "q30", "q31", "memory"); \ +} while (0) + +#ifdef CC_HAS_SVE +#define sve_reg_read(val) \ +({ \ + uint64_t *__val = (val); \ + asm volatile(".arch_extension sve\n" \ + "str z0, [%0, #0, MUL VL]\n" \ + "str z1, [%0, #1, MUL VL]\n" \ + "str z2, [%0, #2, MUL VL]\n" \ + "str z3, [%0, #3, MUL VL]\n" \ + "str z4, [%0, #4, MUL VL]\n" \ + "str z5, [%0, #5, MUL VL]\n" \ + "str z6, [%0, #6, MUL VL]\n" \ + "str z7, [%0, #7, MUL VL]\n" \ + "str z8, [%0, #8, MUL VL]\n" \ + "str z9, [%0, #9, MUL VL]\n" \ + "str z10, [%0, #10, MUL VL]\n" \ + "str z11, [%0, #11, MUL VL]\n" \ + "str z12, [%0, #12, MUL VL]\n" \ + "str z13, [%0, #13, MUL VL]\n" \ + "str z14, [%0, #14, MUL VL]\n" \ + "str z15, [%0, #15, MUL VL]\n" \ + "str z16, [%0, #16, MUL VL]\n" \ + "str z17, [%0, #17, MUL VL]\n" \ + "str z18, [%0, #18, MUL VL]\n" \ + "str z19, [%0, #19, MUL VL]\n" \ + "str z20, [%0, #20, MUL VL]\n" \ + "str z21, [%0, #21, MUL VL]\n" \ + "str z22, [%0, #22, MUL VL]\n" \ + "str z23, [%0, #23, MUL VL]\n" \ + "str z24, [%0, #24, MUL VL]\n" \ + "str z25, [%0, #25, MUL VL]\n" \ + "str z26, [%0, #26, MUL VL]\n" \ + "str z27, [%0, #27, MUL VL]\n" \ + "str z28, [%0, #28, MUL VL]\n" \ + "str z29, [%0, #29, MUL VL]\n" \ + "str z30, [%0, #30, MUL VL]\n" \ + "str z31, [%0, #31, MUL VL]\n" \ + : "=r" (__val) \ + : \ + : "z0", "z1", "z2", "z3", \ + "z4", "z5", "z6", "z7", \ + "z8", "z9", "z10", "z11", \ + "z12", "z13", "z14", \ + "z15", "z16", "z17", \ + "z18", "z19", "z20", \ + "z21", "z22", "z23", \ + "z24", "z25", "z26", \ + "z27", "z28", "z29", \ + "z30", "z31", "memory"); \ +}) + +#define sve_reg_write(val) \ +({ \ + uint64_t *__val = (val); \ + asm volatile(".arch_extension sve\n" \ + "ldr z0, [%0, #0, MUL VL]\n" \ + "ldr z1, [%0, #1, MUL VL]\n" \ + "ldr z2, [%0, #2, MUL VL]\n" \ + "ldr z3, [%0, #3, MUL VL]\n" \ + "ldr z4, [%0, #4, MUL VL]\n" \ + "ldr z5, [%0, #5, MUL VL]\n" \ + "ldr z6, [%0, #6, MUL VL]\n" \ + "ldr z7, [%0, #7, MUL VL]\n" \ + "ldr z8, [%0, #8, MUL VL]\n" \ + "ldr z9, [%0, #9, MUL VL]\n" \ + "ldr z10, [%0, #10, MUL VL]\n" \ + "ldr z11, [%0, #11, MUL VL]\n" \ + "ldr z12, [%0, #12, MUL VL]\n" \ + "ldr z13, [%0, #13, MUL VL]\n" \ + "ldr z14, [%0, #14, MUL VL]\n" \ + "ldr z15, [%0, #15, MUL VL]\n" \ + "ldr z16, [%0, #16, MUL VL]\n" \ + "ldr z17, [%0, #17, MUL VL]\n" \ + "ldr z18, [%0, #18, MUL VL]\n" \ + "ldr z19, [%0, #19, MUL VL]\n" \ + "ldr z20, [%0, #20, MUL VL]\n" \ + "ldr z21, [%0, #21, MUL VL]\n" \ + "ldr z22, [%0, #22, MUL VL]\n" \ + "ldr z23, [%0, #23, MUL VL]\n" \ + "ldr z24, [%0, #24, MUL VL]\n" \ + "ldr z25, [%0, #25, MUL VL]\n" \ + "ldr z26, [%0, #26, MUL VL]\n" \ + "ldr z27, [%0, #27, MUL VL]\n" \ + "ldr z28, [%0, #28, MUL VL]\n" \ + "ldr z29, [%0, #29, MUL VL]\n" \ + "ldr z30, [%0, #30, MUL VL]\n" \ + "ldr z31, [%0, #31, MUL VL]\n" \ + : \ + : "r" (__val) \ + : "z0", "z1", "z2", "z3", \ + "z4", "z5", "z6", "z7", \ + "z8", "z9", "z10", "z11", \ + "z12", "z13", "z14", \ + "z15", "z16", "z17", \ + "z18", "z19", "z20", \ + "z21", "z22", "z23", \ + "z24", "z25", "z26", \ + "z27", "z28", "z29", \ + "z30", "z31", "memory"); \ +}) +#else +#define sve_reg_read(val) report_abort("SVE: not supported") +#define sve_reg_write(val) report_abort("SVE: not supported") +#endif + +static void nr_cpu_check(int nr) +{ + if (nr_cpus < nr) + report_abort("At least %d cpus required", nr); +} + +/** + * @brief check if the FPU/SIMD/SVE register contents are the same as + * the input data provided. + */ +static uint32_t __fpuregs_testall(uint64_t *indata, int sve) +{ + /* 128b aligned array to read data into */ + uint64_t outdata[FPU_QREG_MAX * 2] + __attribute__((aligned(sizeof(__uint128_t)))) = { + [0 ... ((FPU_QREG_MAX * 2) - 1)] = 0 }; + uint8_t regcnt = 0; + uint32_t result = 0; + + if (indata == NULL) + report_abort("invalid data pointer received"); + + /* Read data from FPU/SVE registers */ + if (sve) + sve_reg_read(outdata); + else + fpu_reg_read(outdata); + + /* Check is the data is the same */ + for (regcnt = 0; regcnt < (FPU_QREG_MAX * 2); regcnt += 2) { + if ((outdata[regcnt] != indata[regcnt]) || + (outdata[regcnt + 1] != indata[regcnt + 1])) { + report_info( + "%s save/restore failed for reg: %c%u expected: %lx_%lx received: %lx_%lx\n", + sve ? "SVE" : "FPU/SIMD", + sve ? 'z' : 'q', + regcnt / 2, + indata[regcnt + 1], indata[regcnt], + outdata[regcnt + 1], outdata[regcnt]); + } else { + /* populate a bitmask indicating which + * registers passed/failed + */ + result |= (1 << (regcnt / 2)); + } + } + + return result; +} + +/** + * @brief writes randomly sampled data into the FPU/SIMD registers. + */ +static void __fpuregs_writeall_random(uint64_t **indata, int sve) +{ + /* allocate 128b aligned memory */ + *indata = memalign(sizeof(__uint128_t), sizeof(uint64_t) * FPU_QREG_MAX); + + if (system_supports_rndr()) { + /* Populate memory with random data */ + for (unsigned int i = 0; i < (FPU_QREG_MAX * 2); i++) + while (!arch_collect_entropy(&(*indata)[i])) {} + } else { + /* Populate memory with data from the counter register */ + for (unsigned int i = 0; i < (FPU_QREG_MAX * 2); i++) + (*indata)[i] = get_cntvct(); + } + + /* Write data into FPU registers */ + if (sve) + sve_reg_write(*indata); + else + fpu_reg_write(*indata); +} + +static void fpuregs_writeall_run(void *data) +{ + uint64_t **indata = (uint64_t **)data; + + __fpuregs_writeall_random(indata, 0); +} + +static void sveregs_writeall_run(void *data) +{ + uint64_t **indata = (uint64_t **)data; + + __fpuregs_writeall_random(indata, 1); +} + +static void fpuregs_testall_run(void *data) +{ + uint64_t *indata = (uint64_t *)data; + uint32_t result = 0; + + result = __fpuregs_testall(indata, 0); + report((result == FPU_RESULT_PASS), + "FPU/SIMD register save/restore mask: 0x%x", result); +} + +static void sveregs_testall_run(void *data) +{ + uint64_t *indata = (uint64_t *)data; + uint32_t result = 0; + + result = __fpuregs_testall(indata, 1); + report((result == FPU_RESULT_PASS), + "SVE register save/restore mask: 0x%x", result); +} + +/** + * @brief This test uses two CPUs to test FPU/SIMD save/restore + * @details CPU1 writes random data into FPU/SIMD registers, + * CPU0 corrupts/overwrites the data and finally CPU1 checks + * if the data remains unchanged in its context. + */ +static void fpuregs_context_switch_cpu1(int sve) +{ + int target = CPU1_ID; + uint64_t *indata_remote = NULL; + uint64_t *indata_local = NULL; + + /* write data from CPU1 */ + on_cpu(target, sve ? sveregs_writeall_run + : fpuregs_writeall_run, + &indata_remote); + + /* Overwrite from CPU0 */ + __fpuregs_writeall_random(&indata_local, sve); + + /* Check data consistency */ + on_cpu(target, sve ? sveregs_testall_run + : fpuregs_testall_run, + indata_remote); + + free(indata_remote); + free(indata_local); +} + +/** + * @brief This test uses two CPUs to test FPU/SIMD save/restore + * @details CPU0 writes random data into FPU/SIMD registers, + * CPU1 corrupts/overwrites the data and finally CPU0 checks if + * the data remains unchanged in its context. + */ +static void fpuregs_context_switch_cpu0(int sve) +{ + int target = CPU1_ID; + uint64_t *indata_local = NULL; + uint64_t *indata_remote = NULL; + uint32_t result = 0; + + /* write data from CPU0 */ + __fpuregs_writeall_random(&indata_local, sve); + + /* Overwrite from CPU1 */ + on_cpu(target, sve ? sveregs_writeall_run + : fpuregs_writeall_run, + &indata_remote); + + /* Check data consistency */ + result = __fpuregs_testall(indata_local, sve); + report((result == FPU_RESULT_PASS), + "%s register save/restore mask: 0x%x", sve ? "SVE" : "FPU/SIMD", result); + + free(indata_remote); + free(indata_local); +} + +/** + * Checks if during context switch, FPU/SIMD registers + * are saved/restored. + */ +static void fpuregs_context_switch(void) +{ + fpuregs_context_switch_cpu0(0); + fpuregs_context_switch_cpu1(0); +} + +/** + * Checks if during realm context switch, SVE registers + * are saved/restored. + */ +static void sveregs_context_switch(void) +{ + unsigned long zcr = read_sysreg(ZCR_EL1); + + // Set the SVE vector length to 128-bits + write_sysreg(zcr & ~ZCR_EL1_LEN, ZCR_EL1); + + fpuregs_context_switch_cpu0(1); + fpuregs_context_switch_cpu1(1); +} + +static bool should_run_sve_tests(void) +{ +#ifdef CC_HAS_SVE + if (system_supports_sve()) + return true; +#endif + return false; +} + +int main(int argc, char **argv) +{ + report_prefix_pushf("fpu"); + + nr_cpu_check(CPUS_MAX); + fpuregs_context_switch(); + + if (should_run_sve_tests()) + sveregs_context_switch(); + + return report_summary(); +} diff --git a/arm/unittests.cfg b/arm/unittests.cfg index b5be6668..e35e8506 100644 --- a/arm/unittests.cfg +++ b/arm/unittests.cfg @@ -303,3 +303,11 @@ groups = nodefault realms extra_params = -append 'hvc' accel = kvm arch = arm64 + +# FPU/SIMD test +[fpu-context] +file = fpu.flat +smp = 2 +groups = nodefault realms +accel = kvm +arch = arm64 diff --git a/lib/arm64/asm/processor.h b/lib/arm64/asm/processor.h index 320ebaef..cc993c6a 100644 --- a/lib/arm64/asm/processor.h +++ b/lib/arm64/asm/processor.h @@ -122,6 +122,8 @@ static inline unsigned long get_id_aa64pfr0_el1(void) #define ID_AA64PFR0_EL1_EL3 (0xf << 12) #define ID_AA64PFR0_EL1_EL3_NI (0x0 << 12) +#define ID_AA64PFR0_EL1_SVE_SHIFT 32 + static inline bool system_supports_granule(size_t granule) { u32 shift; @@ -145,5 +147,29 @@ static inline bool system_supports_granule(size_t granule) return ((mmfr0 >> shift) & 0xf) == val; } +static inline bool system_supports_sve(void) +{ + return ((get_id_aa64pfr0_el1() >> ID_AA64PFR0_EL1_SVE_SHIFT) & 0xf) != 0; +} + +static inline int sve_vl(void) +{ + int vl; + + asm volatile(".arch_extension sve\n" + "rdvl %0, #8" + : "=r" (vl)); + + return vl; +} + + +static inline bool system_supports_rndr(void) +{ + u64 id_aa64isar0_el1 = read_sysreg(ID_AA64ISAR0_EL1); + + return ((id_aa64isar0_el1 >> ID_AA64ISAR0_EL1_RNDR_SHIFT) & 0xf) != 0; +} + #endif /* !__ASSEMBLY__ */ #endif /* _ASMARM64_PROCESSOR_H_ */ diff --git a/lib/arm64/asm/sysreg.h b/lib/arm64/asm/sysreg.h index 6cae8b84..f214a4f0 100644 --- a/lib/arm64/asm/sysreg.h +++ b/lib/arm64/asm/sysreg.h @@ -73,6 +73,8 @@ asm( ); #endif /* __ASSEMBLY__ */ +#define ID_AA64ISAR0_EL1_RNDR_SHIFT 60 + #define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) #define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) #define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) @@ -102,4 +104,9 @@ asm( SCTLR_EL1_TSCXT | SCTLR_EL1_EIS | SCTLR_EL1_SPAN | \ SCTLR_EL1_NTLSMD | SCTLR_EL1_LSMAOE) +#define ZCR_EL1 S3_0_C1_C2_0 +#define ZCR_EL1_LEN GENMASK(3, 0) + +#define RNDR S3_3_C2_C4_0 + #endif /* _ASMARM64_SYSREG_H_ */ -- 2.34.1