On 3/16/22 05:40, Pengfei Xu wrote: > +#ifndef __cpuid_count > +#define __cpuid_count(level, count, a, b, c, d) ({ \ > + __asm__ __volatile__ ("cpuid\n\t" \ > + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ > + : "0" (level), "2" (count)); \ > +}) > +#endif By the time you post the next revision, I hope these are merged: > https://lore.kernel.org/all/cover.1647360971.git.reinette.chatre@xxxxxxxxx/ Could you rebase on top of those to avoid duplication, please? > +/* > + * While this function prototype is in the stdlib.h, the header file cannot be > + * included with the -mno-sse option. > + */ > +void *aligned_alloc(size_t alignment, size_t size); That is worrisome. I would serioulsy suspect that if the header can't be included that the code that it references has issues as well. Or, is this just working around a compiler bug? Just googling: https://www.google.com/search?q=stdlib.h+no-sse points to this: https://sourceware.org/bugzilla/show_bug.cgi?id=27600 > +enum supportability { > + NOT_SUPPORT, > + SUPPORT, > +}; > + > +/* It's from arch/x86/kernel/fpu/xstate.c. */ > +static const char * const xfeature_names[] = { > + "x87 floating point registers", > + "SSE registers", > + "AVX registers", > + "MPX bounds registers", > + "MPX CSR", > + "AVX-512 opmask", > + "AVX-512 Hi256", > + "AVX-512 ZMM_Hi256", > + "Processor Trace (unused)", > + "Protection Keys User registers", > + "PASID state", > + "unknown xstate feature", > + "unknown xstate feature", > + "unknown xstate feature", > + "unknown xstate feature", > + "unknown xstate feature", > + "unknown xstate feature", > + "AMX Tile config", > + "AMX Tile data", > + "unknown xstate feature", > +}; > + > +/* List of XSAVE features Linux knows about. */ > +enum xfeature { > + XFEATURE_FP, > + XFEATURE_SSE, > + /* > + * Values above here are "legacy states". > + * Those below are "extended states". > + */ > + XFEATURE_YMM, > + XFEATURE_BNDREGS, > + XFEATURE_BNDCSR, > + XFEATURE_OPMASK, > + XFEATURE_ZMM_Hi256, > + XFEATURE_Hi16_ZMM, > + XFEATURE_PT_UNIMPLEMENTED_SO_FAR, > + XFEATURE_PKRU, > + XFEATURE_PASID, > + XFEATURE_RSRVD_COMP_11, > + XFEATURE_RSRVD_COMP_12, > + XFEATURE_RSRVD_COMP_13, > + XFEATURE_RSRVD_COMP_14, > + XFEATURE_LBR, > + XFEATURE_RSRVD_COMP_16, > + XFEATURE_XTILE_CFG, > + XFEATURE_XTILE_DATA, > + XFEATURE_MAX, > +}; > + > +struct xsave_buffer { > + union { > + struct { > + char legacy[XSAVE_HDR_OFFSET]; > + char header[XSAVE_HDR_SIZE]; > + char extended[0]; > + }; > + char bytes[0]; > + }; > +}; > + > +static struct { > + uint64_t mask; > + uint32_t size[XFEATURE_MAX]; > + uint32_t offset[XFEATURE_MAX]; > +} xstate_info; > + > +static inline void check_cpuid_xsave_availability(void) > +{ > + uint32_t eax, ebx, ecx, edx; > + > + /* > + * CPUID.1:ECX.XSAVE[bit 26] enumerates general > + * support for the XSAVE feature set, including > + * XGETBV. > + */ > + __cpuid_count(1, 0, eax, ebx, ecx, edx); > + if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) > + fatal_error("cpuid: no CPU xsave support"); > + if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) > + fatal_error("cpuid: no OS xsave support"); > +} Could you please use the standard selftest macros for these fatal errors? Also, do these indicate that the test failed, or that it just was not able to run? > +static inline bool xstate_tested(int xfeature_num) > +{ > + return !!(xstate_info.mask & (1 << xfeature_num)); > +} > + > +static inline int cpu_has_avx2(void) > +{ > + unsigned int eax, ebx, ecx, edx; > + > + /* CPUID.7.0:EBX.AVX2[bit 5]: the support for AVX2 instructions */ > + __cpuid_count(7, 0, eax, ebx, ecx, edx); > + > + return !!(ebx & CPUID_LEAF7_EBX_AVX2_MASK); > +} > + > +static inline int cpu_has_avx512f(void) > +{ > + unsigned int eax, ebx, ecx, edx; > + > + /* CPUID.7.0:EBX.AVX512F[bit 16]: the support for AVX512F instructions */ > + __cpuid_count(7, 0, eax, ebx, ecx, edx); > + > + return !!(ebx & CPUID_LEAF7_EBX_AVX512F_MASK); > +} > + > +static inline int cpu_has_pkeys(void) > +{ > + unsigned int eax, ebx, ecx, edx; > + > + /* CPUID.7.0:ECX.PKU[bit 3]: the support for PKRU instructions */ > + __cpuid_count(7, 0, eax, ebx, ecx, edx); > + if (!(ecx & CPUID_LEAF7_ECX_PKU_MASK)) > + return NOT_SUPPORT; > + /* CPUID.7.0:ECX.OSPKE[bit 4]: the support for OS set CR4.PKE */ > + if (!(ecx & CPUID_LEAF7_ECX_OSPKE_MASK)) > + return NOT_SUPPORT; > + > + return SUPPORT; > +} You don't need that CPUID_LEAF7_ECX_PKU_MASK check. The OSPKE one is sufficient. > +static uint32_t get_xstate_size(void) > +{ > + uint32_t eax, ebx, ecx, edx; > + > + __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER, eax, ebx, > + ecx, edx); > + /* > + * EBX enumerates the size (in bytes) required by the XSAVE > + * instruction for an XSAVE area containing all the user state > + * components corresponding to bits currently set in XCR0. > + */ > + return ebx; > +} > + > +static struct xsave_buffer *alloc_xbuf(uint32_t buf_size) > +{ > + struct xsave_buffer *xbuf; > + > + /* XSAVE buffer should be 64B-aligned. */ > + xbuf = aligned_alloc(64, buf_size); > + if (!xbuf) > + fatal_error("aligned_alloc()"); > + > + return xbuf; > +} > + > +static inline void __xsave(struct xsave_buffer *xbuf, uint64_t rfbm) > +{ > + uint32_t rfbm_lo = rfbm; > + uint32_t rfbm_hi = rfbm >> 32; > + > + asm volatile("xsave (%%rdi)" > + : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi) > + : "memory"); > +} > + > +static inline void __xrstor(struct xsave_buffer *xbuf, uint64_t rfbm) > +{ > + uint32_t rfbm_lo = rfbm; > + uint32_t rfbm_hi = rfbm >> 32; > + > + asm volatile("xrstor (%%rdi)" > + : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)); > +} Could you please move all of these generic functions into a common header? Maybe the same one with __cpuid()? > +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), > + int flags) > +{ > + struct sigaction sa; > + > + memset(&sa, 0, sizeof(sa)); > + sa.sa_sigaction = handler; > + sa.sa_flags = SA_SIGINFO | flags; > + sigemptyset(&sa.sa_mask); > + if (sigaction(sig, &sa, 0)) > + fatal_error("sigaction"); > +} > + > +static void clearhandler(int sig) > +{ > + struct sigaction sa; > + > + memset(&sa, 0, sizeof(sa)); > + sa.sa_handler = SIG_DFL; > + sigemptyset(&sa.sa_mask); > + if (sigaction(sig, &sa, 0)) > + fatal_error("sigaction"); > +} > + > +/* Retrieve the offset and size of a specific xstate. */ > +static void retrieve_xstate_size_and_offset(uint32_t xfeature_num) > +{ > + uint32_t eax, ebx, ecx, edx; > + > + __cpuid_count(CPUID_LEAF_XSTATE, xfeature_num, eax, ebx, ecx, edx); > + /* > + * CPUID.(EAX=0xd, ECX=xfeature_num), and output is as follow: > + * eax: xfeature num state component size > + * ebx: xfeature num state component offset in user buffer > + */ > + xstate_info.size[xfeature_num] = eax; > + xstate_info.offset[xfeature_num] = ebx; > +} > + > +static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv) > +{ > + /* XSTATE_BV is at the beginning of xstate header. */ > + *(uint64_t *)(&buffer->header) = bv; > +} > + > +static void check_cpuid_xstate_info(void) > +{ > + /* CPUs that support XSAVE could support FP and SSE by default. */ > + xstate_info.mask = XFEATURE_MASK_FP | XFEATURE_MASK_SSE; > + > + xstate_size = get_xstate_size(); > + if (cpu_has_avx2()) { > + xstate_info.mask |= XFEATURE_MASK_YMM; > + retrieve_xstate_size_and_offset(XFEATURE_YMM); > + } > + > + if (cpu_has_avx512f()) { > + xstate_info.mask |= XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | > + XFEATURE_MASK_Hi16_ZMM; > + retrieve_xstate_size_and_offset(XFEATURE_OPMASK); > + retrieve_xstate_size_and_offset(XFEATURE_ZMM_Hi256); > + retrieve_xstate_size_and_offset(XFEATURE_Hi16_ZMM); > + } > + > + if (cpu_has_pkeys()) { > + xstate_info.mask |= XFEATURE_MASK_PKRU; > + retrieve_xstate_size_and_offset(XFEATURE_PKRU); > + } > +} > + > +static void fill_xstate_buf(uint8_t test_byte, unsigned char *buf, > + int xfeature_num) > +{ > + uint32_t i; > + > + for (i = 0; i < xstate_info.size[xfeature_num]; i++) > + buf[xstate_info.offset[xfeature_num] + i] = test_byte; > +} > + > +/* Fill FP/XMM/YMM/OPMASK and PKRU xstates into buffer. */ > +static void fill_xstates_buf(struct xsave_buffer *buf, uint32_t xsave_mask) > +{ > + uint32_t i; > + /* The data of FP x87 state are as follows. */ OK, but what *is* this? Random data? Or did you put some data in that means something? > + unsigned char fp_data[160] = { > + 0x7f, 0x03, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, > + 0xf0, 0x19, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, > + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, > + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, > + 0x00, 0x78, 0xfa, 0x79, 0xf9, 0x78, 0xfa, 0xf9, > + 0xf2, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, > + 0x00, 0x78, 0xfa, 0x79, 0xf9, 0x78, 0xfa, 0xf9, > + 0xf2, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, > + 0x00, 0x78, 0xfa, 0x79, 0xf9, 0x78, 0xfa, 0xf9, > + 0xf2, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, > + 0x00, 0x78, 0xfa, 0x79, 0xf9, 0x78, 0xfa, 0xf9, > + 0xf2, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, > + 0x00, 0x78, 0xfa, 0x79, 0xf9, 0x78, 0xfa, 0xf9, > + 0xf2, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, > + 0x00, 0x78, 0xfa, 0x79, 0xf9, 0x78, 0xfa, 0xf9, > + 0xf2, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, > + 0x00, 0x78, 0xfa, 0x79, 0xf9, 0x78, 0xfa, 0xf9, > + 0xf2, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, > + 0x00, 0x78, 0xfa, 0x79, 0xf9, 0x78, 0xfa, 0xf9, > + 0xf2, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; > + > + /* Clean the buffer with all 0 first. */ > + memset(buf, 0, xstate_size); > + > + /* Fill fp x87 state: MXCSR and MXCSR_MASK data(0-159 bytes) into buffer. */ > + memcpy(buf, fp_data, FP_SIZE); > + > + /* > + * Fill test byte value into XMM xstate buffer(160-415 bytes). > + * xstate 416-511 bytes are reserved as 0. > + */ > + for (i = 0; i < XMM_SIZE; i++) > + *((unsigned char *)buf + XMM_OFFSET + i) = XSTATE_TESTBYTE; Isn't this just memset()? > + /* > + * Fill xstate-component bitmap(512-519 bytes) into xstate header. > + * xstate header range is 512-575 bytes. > + */ > + set_xstatebv(buf, xsave_mask); > + > + /* Fill test byte value into YMM xstate buffer(YMM offset/size). */ > + if (xstate_tested(XFEATURE_YMM)) > + fill_xstate_buf(XSTATE_TESTBYTE, (unsigned char *)buf, XFEATURE_YMM); > + > + /* > + * Fill test byte value into AVX512 OPMASK/ZMM xstates buffer > + * (AVX512_OPMASK/ZMM_Hi256/Hi16_ZMM offset/size). > + */ > + if (xstate_tested(XFEATURE_OPMASK)) > + fill_xstate_buf(XSTATE_TESTBYTE, (unsigned char *)buf, XFEATURE_OPMASK); > + if (xstate_tested(XFEATURE_ZMM_Hi256)) { > + fill_xstate_buf(XSTATE_TESTBYTE, (unsigned char *)buf, > + XFEATURE_ZMM_Hi256); > + } > + if (xstate_tested(XFEATURE_Hi16_ZMM)) { > + fill_xstate_buf(XSTATE_TESTBYTE, (unsigned char *)buf, > + XFEATURE_Hi16_ZMM); > + } > + > + if (xstate_tested(XFEATURE_PKRU)) { > + /* Only 0-3 bytes of pkru xstates are allowed to be written. */ > + memset((unsigned char *)buf + xstate_info.offset[XFEATURE_PKRU], > + PKRU_TESTBYTE, sizeof(uint32_t)); > + } > +} I have to wonder if we can do this in a bit more structured way: struct xstate_test { bool (*init)(void); bool (*fill_state)(struct xsave_buffer *buf); ... } Yes, that means indirect function calls, but that's OK since we know it will all be compiled with the "no-sse" flag (and friends). Then fill_xstates_buf() just becomes a loop over an xstate_tests[] array. > +/* > + * Because xstate like XMM, YMM registers are not preserved across function > + * calls, so use inline function with assembly code only for fork syscall. > + */ > +static inline long long __fork(void) > +{ > + long long ret, nr = SYS_fork; Are those the right types? 'long long' is 64-bit on 32-bit systems, iirc. > + asm volatile("syscall" > + : "=a" (ret) > + : "a" (nr), "b" (nr) > + : "rcx", "r11", "memory", "cc"); > + > + return ret; > +} > > +/* > + * Because xstate like XMM, YMM registers are not preserved across function > + * calls, so use inline function with assembly code only to raise signal. > + */ > +static inline long long __raise(long long pid_num, long long sig_num) > +{ > + long long ret, nr = SYS_kill; > + > + register long long arg1 asm("rdi") = pid_num; > + register long long arg2 asm("rsi") = sig_num; I really don't like register variables. They're very fragile. Imagine if someone put a printf() right here. Why don't you just do this with inline assembly directives? > + asm volatile("syscall" > + : "=a" (ret) > + : "a" (nr), "b" (nr), "r" (arg1), "r" (arg2) > + : "rcx", "r11", "memory", "cc"); > + > + return ret; > +} > + > +static void sigusr1_handler(int signum, siginfo_t *info, void *__ctxp) > +{ > + sigusr1_done = true; > +} > + > +static void test_xstate_sig_handle(void) > +{ > + pid_t process_pid; > + > + sethandler(SIGUSR1, sigusr1_handler, 0); > + printf("[RUN]\tCheck xstate around signal handling test.\n"); > + process_pid = getpid(); > + > + /* > + * Xrstor the valid_xbuf and call syscall assembly instruction, then > + * save the xstate to compared_xbuf after signal handling for comparison. > + */ > + __xrstor(valid_xbuf, xstate_info.mask); > + __raise(process_pid, SIGUSR1); > + __xsave(compared_xbuf, xstate_info.mask); > + if (sigusr1_done == true) > + printf("[NOTE]\tSIGUSR1 handling is done.\n"); > + else > + fatal_error("Didn't access SIGUSR1 handling after raised SIGUSR1"); > + > + if (memcmp(&valid_xbuf->bytes[0], &compared_xbuf->bytes[0], xstate_size)) > + printf("[FAIL]\tProcess xstate is not same after signal handling\n"); > + else > + printf("[PASS]\tProcess xstate is same after signal handling.\n"); > + > + clearhandler(SIGUSR1); > +} > + > +static void test_xstate_fork(void) > +{ > + pid_t child; > + int status; > + > + printf("[RUN]\tParent pid:%d check xstate around fork test.\n", getpid()); > + memset(compared_xbuf, 0, xstate_size); > + > + /* > + * Xrstor the valid_xbuf and call syscall assembly instruction, then > + * save the xstate to compared_xbuf in child process for comparison. > + */ > + __xrstor(valid_xbuf, xstate_info.mask); > + child = __fork(); > + if (child < 0) { > + /* Fork syscall failed */ > + fatal_error("fork failed"); > + } else if (child == 0) { > + /* Fork syscall succeeded, now in the child. */ > + __xsave(compared_xbuf, xstate_info.mask); > + if (memcmp(&valid_xbuf->bytes[0], &compared_xbuf->bytes[0], > + xstate_size)) { > + printf("[FAIL]\tXstate of child process:%d is not same as xstate of parent\n", > + getpid()); > + } else { > + printf("[PASS]\tXstate of child process:%d is same as xstate of parent.\n", > + getpid()); > + } > + } else { > + if (waitpid(child, &status, 0) != child || !WIFEXITED(status)) > + fatal_error("Child exit with error status"); It also couldn't hurt to check the XSAVE state in the parent.