From: Eric Biggers <ebiggers@xxxxxxxxxx> Lift zmm_exclusion_list in aesni-intel_glue.c into the x86 CPU setup code, and add a new x86 CPU feature flag X86_FEATURE_PREFER_YMM that is set when the CPU is on this list. This allows other code in arch/x86/, such as the CRC library code, to apply the same exclusion list when deciding whether to execute 256-bit or 512-bit optimized functions. Note that full AVX512 support including zmm registers is still exposed to userspace and is still supported for in-kernel use. This flag just indicates whether in-kernel code should prefer to use ymm registers. Signed-off-by: Eric Biggers <ebiggers@xxxxxxxxxx> --- arch/x86/crypto/aesni-intel_glue.c | 22 +--------------------- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/intel.c | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index fbf43482e1f5e..8e648abfb5ab8 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1534,30 +1534,10 @@ DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256, DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512, "generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512", AES_GCM_KEY_AVX10_SIZE, 800); #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ -/* - * This is a list of CPU models that are known to suffer from downclocking when - * zmm registers (512-bit vectors) are used. On these CPUs, the AES mode - * implementations with zmm registers won't be used by default. Implementations - * with ymm registers (256-bit vectors) will be used by default instead. - */ -static const struct x86_cpu_id zmm_exclusion_list[] = { - X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), - X86_MATCH_VFM(INTEL_ICELAKE_X, 0), - X86_MATCH_VFM(INTEL_ICELAKE_D, 0), - X86_MATCH_VFM(INTEL_ICELAKE, 0), - X86_MATCH_VFM(INTEL_ICELAKE_L, 0), - X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), - X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), - X86_MATCH_VFM(INTEL_TIGERLAKE, 0), - /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ - /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */ - {}, -}; - static int __init register_avx_algs(void) { int err; if (!boot_cpu_has(X86_FEATURE_AVX)) @@ -1598,11 +1578,11 @@ static int __init register_avx_algs(void) ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256), aes_gcm_simdalgs_vaes_avx10_256); if (err) return err; - if (x86_match_cpu(zmm_exclusion_list)) { + if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) { int i; aes_xts_alg_vaes_avx10_512.base.cra_priority = 1; for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++) aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 17b6590748c00..948bfa25ccc7b 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -477,10 +477,11 @@ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid zmm registers due to downclocking */ /* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index d1de300af1737..0beb44c4ac026 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -519,10 +519,29 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c) msr = this_cpu_read(msr_misc_features_shadow); wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); } +/* + * This is a list of Intel CPUs that are known to suffer from downclocking when + * zmm registers (512-bit vectors) are used. On these CPUs, when the kernel + * executes SIMD-optimized code such as cryptography functions or CRCs, it + * should prefer 256-bit (ymm) code to 512-bit (zmm) code. + */ +static const struct x86_cpu_id zmm_exclusion_list[] = { + X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_X, 0), + X86_MATCH_VFM(INTEL_ICELAKE_D, 0), + X86_MATCH_VFM(INTEL_ICELAKE, 0), + X86_MATCH_VFM(INTEL_ICELAKE_L, 0), + X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), + X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), + X86_MATCH_VFM(INTEL_TIGERLAKE, 0), + /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ + {}, +}; + static void init_intel(struct cpuinfo_x86 *c) { early_init_intel(c); intel_workarounds(c); @@ -602,10 +621,13 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_P4); if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_P3); #endif + if (x86_match_cpu(zmm_exclusion_list)) + set_cpu_cap(c, X86_FEATURE_PREFER_YMM); + /* Work around errata */ srat_detect_node(c); init_ia32_feat_ctl(c); -- 2.47.0