Don't register and unregister each of the functions from least- to most-optimized (e.g., SSSE3 then AVX then AVX2); register all variations. This enables selecting those other algorithms if needed, such as for testing with: modprobe tcrypt mode=300 alg=sha512-avx modprobe tcrypt mode=400 alg=sha512-avx Suggested-by: Tim Chen <tim.c.chen@xxxxxxxxxxxxxxx> Suggested-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Robert Elliott <elliott@xxxxxxx> --- v3 register all the variations, not just the best one, per Herbert's feedback. return -ENODEV if none are successful, 0 if any are successful v4 remove driver_name strings that are only used by later patches no longer included in this series that enhance the prints. A future patch series might remove existing prints rather than add and enhance them. Reported-by: kernel test robot <lkp@xxxxxxxxx> --- arch/x86/crypto/sha1_ssse3_glue.c | 132 +++++++++++++-------------- arch/x86/crypto/sha256_ssse3_glue.c | 136 +++++++++++++--------------- arch/x86/crypto/sha512_ssse3_glue.c | 99 +++++++++----------- 3 files changed, 168 insertions(+), 199 deletions(-) diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 4bc77c84b0fb..e75a1060bb5f 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -34,6 +34,13 @@ static const unsigned int bytes_per_fpu_avx2 = 34 * 1024; static const unsigned int bytes_per_fpu_avx = 30 * 1024; static const unsigned int bytes_per_fpu_ssse3 = 26 * 1024; +static int using_x86_ssse3; +static int using_x86_avx; +static int using_x86_avx2; +#ifdef CONFIG_AS_SHA1_NI +static int using_x86_shani; +#endif + static int sha1_update(struct shash_desc *desc, const u8 *data, unsigned int len, unsigned int bytes_per_fpu, sha1_block_fn *sha1_xform) @@ -128,17 +135,12 @@ static struct shash_alg sha1_ssse3_alg = { } }; -static int register_sha1_ssse3(void) -{ - if (boot_cpu_has(X86_FEATURE_SSSE3)) - return crypto_register_shash(&sha1_ssse3_alg); - return 0; -} - static void unregister_sha1_ssse3(void) { - if (boot_cpu_has(X86_FEATURE_SSSE3)) + if (using_x86_ssse3) { crypto_unregister_shash(&sha1_ssse3_alg); + using_x86_ssse3 = 0; + } } asmlinkage void sha1_transform_avx(struct sha1_state *state, @@ -179,28 +181,12 @@ static struct shash_alg sha1_avx_alg = { } }; -static bool avx_usable(void) -{ - if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { - if (boot_cpu_has(X86_FEATURE_AVX)) - pr_info("AVX detected but unusable.\n"); - return false; - } - - return true; -} - -static int register_sha1_avx(void) -{ - if (avx_usable()) - return crypto_register_shash(&sha1_avx_alg); - return 0; -} - static void unregister_sha1_avx(void) { - if (avx_usable()) + if (using_x86_avx) { crypto_unregister_shash(&sha1_avx_alg); + using_x86_avx = 0; + } } #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ @@ -208,16 +194,6 @@ static void unregister_sha1_avx(void) asmlinkage void sha1_transform_avx2(struct sha1_state *state, const u8 *data, int blocks); -static bool avx2_usable(void) -{ - if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) - && boot_cpu_has(X86_FEATURE_BMI1) - && boot_cpu_has(X86_FEATURE_BMI2)) - return true; - - return false; -} - static void sha1_apply_transform_avx2(struct sha1_state *state, const u8 *data, int blocks) { @@ -263,17 +239,12 @@ static struct shash_alg sha1_avx2_alg = { } }; -static int register_sha1_avx2(void) -{ - if (avx2_usable()) - return crypto_register_shash(&sha1_avx2_alg); - return 0; -} - static void unregister_sha1_avx2(void) { - if (avx2_usable()) + if (using_x86_avx2) { crypto_unregister_shash(&sha1_avx2_alg); + using_x86_avx2 = 0; + } } #ifdef CONFIG_AS_SHA1_NI @@ -315,49 +286,70 @@ static struct shash_alg sha1_ni_alg = { } }; -static int register_sha1_ni(void) -{ - if (boot_cpu_has(X86_FEATURE_SHA_NI)) - return crypto_register_shash(&sha1_ni_alg); - return 0; -} - static void unregister_sha1_ni(void) { - if (boot_cpu_has(X86_FEATURE_SHA_NI)) + if (using_x86_shani) { crypto_unregister_shash(&sha1_ni_alg); + using_x86_shani = 0; + } } #else -static inline int register_sha1_ni(void) { return 0; } static inline void unregister_sha1_ni(void) { } #endif static int __init sha1_ssse3_mod_init(void) { - if (register_sha1_ssse3()) - goto fail; + const char *feature_name; + int ret; + +#ifdef CONFIG_AS_SHA1_NI + /* SHA-NI */ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) { - if (register_sha1_avx()) { - unregister_sha1_ssse3(); - goto fail; + ret = crypto_register_shash(&sha1_ni_alg); + if (!ret) + using_x86_shani = 1; } +#endif + + /* AVX2 */ + if (boot_cpu_has(X86_FEATURE_AVX2)) { - if (register_sha1_avx2()) { - unregister_sha1_avx(); - unregister_sha1_ssse3(); - goto fail; + if (boot_cpu_has(X86_FEATURE_BMI1) && + boot_cpu_has(X86_FEATURE_BMI2)) { + + ret = crypto_register_shash(&sha1_avx2_alg); + if (!ret) + using_x86_avx2 = 1; + } } - if (register_sha1_ni()) { - unregister_sha1_avx2(); - unregister_sha1_avx(); - unregister_sha1_ssse3(); - goto fail; + /* AVX */ + if (boot_cpu_has(X86_FEATURE_AVX)) { + + if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + + ret = crypto_register_shash(&sha1_avx_alg); + if (!ret) + using_x86_avx = 1; + } } - return 0; -fail: + /* SSE3 */ + if (boot_cpu_has(X86_FEATURE_SSSE3)) { + ret = crypto_register_shash(&sha1_ssse3_alg); + if (!ret) + using_x86_ssse3 = 1; + } + +#ifdef CONFIG_AS_SHA1_NI + if (using_x86_shani) + return 0; +#endif + if (using_x86_avx2 || using_x86_avx || using_x86_ssse3) + return 0; return -ENODEV; } diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index cdcdf5a80ffe..c6261ede4bae 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -51,6 +51,13 @@ static const unsigned int bytes_per_fpu_ssse3 = 11 * 1024; asmlinkage void sha256_transform_ssse3(struct sha256_state *state, const u8 *data, int blocks); +static int using_x86_ssse3; +static int using_x86_avx; +static int using_x86_avx2; +#ifdef CONFIG_AS_SHA256_NI +static int using_x86_shani; +#endif + static int _sha256_update(struct shash_desc *desc, const u8 *data, unsigned int len, unsigned int bytes_per_fpu, sha256_block_fn *sha256_xform) @@ -156,19 +163,13 @@ static struct shash_alg sha256_ssse3_algs[] = { { } } }; -static int register_sha256_ssse3(void) -{ - if (boot_cpu_has(X86_FEATURE_SSSE3)) - return crypto_register_shashes(sha256_ssse3_algs, - ARRAY_SIZE(sha256_ssse3_algs)); - return 0; -} - static void unregister_sha256_ssse3(void) { - if (boot_cpu_has(X86_FEATURE_SSSE3)) + if (using_x86_ssse3) { crypto_unregister_shashes(sha256_ssse3_algs, ARRAY_SIZE(sha256_ssse3_algs)); + using_x86_ssse3 = 0; + } } asmlinkage void sha256_transform_avx(struct sha256_state *state, @@ -223,30 +224,13 @@ static struct shash_alg sha256_avx_algs[] = { { } } }; -static bool avx_usable(void) -{ - if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { - if (boot_cpu_has(X86_FEATURE_AVX)) - pr_info("AVX detected but unusable.\n"); - return false; - } - - return true; -} - -static int register_sha256_avx(void) -{ - if (avx_usable()) - return crypto_register_shashes(sha256_avx_algs, - ARRAY_SIZE(sha256_avx_algs)); - return 0; -} - static void unregister_sha256_avx(void) { - if (avx_usable()) + if (using_x86_avx) { crypto_unregister_shashes(sha256_avx_algs, ARRAY_SIZE(sha256_avx_algs)); + using_x86_avx = 0; + } } asmlinkage void sha256_transform_rorx(struct sha256_state *state, @@ -301,28 +285,13 @@ static struct shash_alg sha256_avx2_algs[] = { { } } }; -static bool avx2_usable(void) -{ - if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && - boot_cpu_has(X86_FEATURE_BMI2)) - return true; - - return false; -} - -static int register_sha256_avx2(void) -{ - if (avx2_usable()) - return crypto_register_shashes(sha256_avx2_algs, - ARRAY_SIZE(sha256_avx2_algs)); - return 0; -} - static void unregister_sha256_avx2(void) { - if (avx2_usable()) + if (using_x86_avx2) { crypto_unregister_shashes(sha256_avx2_algs, ARRAY_SIZE(sha256_avx2_algs)); + using_x86_avx2 = 0; + } } #ifdef CONFIG_AS_SHA256_NI @@ -378,51 +347,72 @@ static struct shash_alg sha256_ni_algs[] = { { } } }; -static int register_sha256_ni(void) -{ - if (boot_cpu_has(X86_FEATURE_SHA_NI)) - return crypto_register_shashes(sha256_ni_algs, - ARRAY_SIZE(sha256_ni_algs)); - return 0; -} - static void unregister_sha256_ni(void) { - if (boot_cpu_has(X86_FEATURE_SHA_NI)) + if (using_x86_shani) { crypto_unregister_shashes(sha256_ni_algs, ARRAY_SIZE(sha256_ni_algs)); + using_x86_shani = 0; + } } #else -static inline int register_sha256_ni(void) { return 0; } static inline void unregister_sha256_ni(void) { } #endif static int __init sha256_ssse3_mod_init(void) { - if (register_sha256_ssse3()) - goto fail; + const char *feature_name; + int ret; + +#ifdef CONFIG_AS_SHA256_NI + /* SHA-NI */ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) { - if (register_sha256_avx()) { - unregister_sha256_ssse3(); - goto fail; + ret = crypto_register_shashes(sha256_ni_algs, + ARRAY_SIZE(sha256_ni_algs)); + if (!ret) + using_x86_shani = 1; } +#endif + + /* AVX2 */ + if (boot_cpu_has(X86_FEATURE_AVX2)) { - if (register_sha256_avx2()) { - unregister_sha256_avx(); - unregister_sha256_ssse3(); - goto fail; + if (boot_cpu_has(X86_FEATURE_BMI2)) { + ret = crypto_register_shashes(sha256_avx2_algs, + ARRAY_SIZE(sha256_avx2_algs)); + if (!ret) + using_x86_avx2 = 1; + } } - if (register_sha256_ni()) { - unregister_sha256_avx2(); - unregister_sha256_avx(); - unregister_sha256_ssse3(); - goto fail; + /* AVX */ + if (boot_cpu_has(X86_FEATURE_AVX)) { + + if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + ret = crypto_register_shashes(sha256_avx_algs, + ARRAY_SIZE(sha256_avx_algs)); + if (!ret) + using_x86_avx = 1; + } } - return 0; -fail: + /* SSE3 */ + if (boot_cpu_has(X86_FEATURE_SSSE3)) { + ret = crypto_register_shashes(sha256_ssse3_algs, + ARRAY_SIZE(sha256_ssse3_algs)); + if (!ret) + using_x86_ssse3 = 1; + } + +#ifdef CONFIG_AS_SHA256_NI + if (using_x86_shani) + return 0; +#endif + if (using_x86_avx2 || using_x86_avx || using_x86_ssse3) + return 0; return -ENODEV; } diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index c7036cfe2a7e..feae85933270 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -47,6 +47,10 @@ static const unsigned int bytes_per_fpu_ssse3 = 17 * 1024; asmlinkage void sha512_transform_ssse3(struct sha512_state *state, const u8 *data, int blocks); +static int using_x86_ssse3; +static int using_x86_avx; +static int using_x86_avx2; + static int sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len, unsigned int bytes_per_fpu, sha512_block_fn *sha512_xform) @@ -152,33 +156,17 @@ static struct shash_alg sha512_ssse3_algs[] = { { } } }; -static int register_sha512_ssse3(void) -{ - if (boot_cpu_has(X86_FEATURE_SSSE3)) - return crypto_register_shashes(sha512_ssse3_algs, - ARRAY_SIZE(sha512_ssse3_algs)); - return 0; -} - static void unregister_sha512_ssse3(void) { - if (boot_cpu_has(X86_FEATURE_SSSE3)) + if (using_x86_ssse3) { crypto_unregister_shashes(sha512_ssse3_algs, ARRAY_SIZE(sha512_ssse3_algs)); + using_x86_ssse3 = 0; + } } asmlinkage void sha512_transform_avx(struct sha512_state *state, const u8 *data, int blocks); -static bool avx_usable(void) -{ - if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { - if (boot_cpu_has(X86_FEATURE_AVX)) - pr_info("AVX detected but unusable.\n"); - return false; - } - - return true; -} static int sha512_avx_update(struct shash_desc *desc, const u8 *data, unsigned int len) @@ -230,19 +218,13 @@ static struct shash_alg sha512_avx_algs[] = { { } } }; -static int register_sha512_avx(void) -{ - if (avx_usable()) - return crypto_register_shashes(sha512_avx_algs, - ARRAY_SIZE(sha512_avx_algs)); - return 0; -} - static void unregister_sha512_avx(void) { - if (avx_usable()) + if (using_x86_avx) { crypto_unregister_shashes(sha512_avx_algs, ARRAY_SIZE(sha512_avx_algs)); + using_x86_avx = 0; + } } asmlinkage void sha512_transform_rorx(struct sha512_state *state, @@ -298,22 +280,6 @@ static struct shash_alg sha512_avx2_algs[] = { { } } }; -static bool avx2_usable(void) -{ - if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && - boot_cpu_has(X86_FEATURE_BMI2)) - return true; - - return false; -} - -static int register_sha512_avx2(void) -{ - if (avx2_usable()) - return crypto_register_shashes(sha512_avx2_algs, - ARRAY_SIZE(sha512_avx2_algs)); - return 0; -} static const struct x86_cpu_id module_cpu_ids[] = { X86_MATCH_FEATURE(X86_FEATURE_AVX2, NULL), X86_MATCH_FEATURE(X86_FEATURE_AVX, NULL), @@ -324,32 +290,53 @@ MODULE_DEVICE_TABLE(x86cpu, module_cpu_ids); static void unregister_sha512_avx2(void) { - if (avx2_usable()) + if (using_x86_avx2) { crypto_unregister_shashes(sha512_avx2_algs, ARRAY_SIZE(sha512_avx2_algs)); + using_x86_avx2 = 0; + } } static int __init sha512_ssse3_mod_init(void) { + const char *feature_name; + int ret; + if (!x86_match_cpu(module_cpu_ids)) return -ENODEV; - if (register_sha512_ssse3()) - goto fail; + /* AVX2 */ + if (boot_cpu_has(X86_FEATURE_AVX2)) { + if (boot_cpu_has(X86_FEATURE_BMI2)) { + ret = crypto_register_shashes(sha512_avx2_algs, + ARRAY_SIZE(sha512_avx2_algs)); + if (!ret) + using_x86_avx2 = 1; + } + } + + /* AVX */ + if (boot_cpu_has(X86_FEATURE_AVX)) { - if (register_sha512_avx()) { - unregister_sha512_ssse3(); - goto fail; + if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + ret = crypto_register_shashes(sha512_avx_algs, + ARRAY_SIZE(sha512_avx_algs)); + if (!ret) + using_x86_avx = 1; + } } - if (register_sha512_avx2()) { - unregister_sha512_avx(); - unregister_sha512_ssse3(); - goto fail; + /* SSE3 */ + if (boot_cpu_has(X86_FEATURE_SSSE3)) { + ret = crypto_register_shashes(sha512_ssse3_algs, + ARRAY_SIZE(sha512_ssse3_algs)); + if (!ret) + using_x86_ssse3 = 1; } - return 0; -fail: + if (using_x86_avx2 || using_x86_avx || using_x86_ssse3) + return 0; return -ENODEV; } -- 2.38.1