In preparation of extending the x86 ChaCha driver to also expose the ChaCha library interface, drop the dependency on the chacha_generic crypto driver as a non-SIMD fallback, and depend on the generic ChaCha library directly. This way, we only pull in the code we actually need, without registering a set of ChaCha skciphers that we will never use. Since turning the FPU on and off is cheap these days, simplify the SIMD routine by dropping the per-page yield, which makes for a cleaner switch to the library API as well. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> --- arch/x86/crypto/chacha_glue.c | 77 ++++++++++---------- crypto/Kconfig | 2 +- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index bc62daa8dafd..3a1a11a4326d 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -127,32 +127,32 @@ static int chacha_simd_stream_xor(struct skcipher_walk *walk, const struct chacha_ctx *ctx, const u8 *iv) { u32 *state, state_buf[16 + 2] __aligned(8); - int next_yield = 4096; /* bytes until next FPU yield */ + bool do_simd; int err = 0; BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); - crypto_chacha_init(state, ctx, iv); + chacha_init_generic(state, ctx->key, iv); + do_simd = (walk->total > CHACHA_BLOCK_SIZE) && crypto_simd_usable(); while (walk->nbytes > 0) { unsigned int nbytes = walk->nbytes; - if (nbytes < walk->total) { + if (nbytes < walk->total) nbytes = round_down(nbytes, walk->stride); - next_yield -= nbytes; - } - - chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr, - nbytes, ctx->nrounds); - if (next_yield <= 0) { - /* temporarily allow preemption */ - kernel_fpu_end(); + if (!do_simd) { + chacha_crypt_generic(state, walk->dst.virt.addr, + walk->src.virt.addr, nbytes, + ctx->nrounds); + } else { kernel_fpu_begin(); - next_yield = 4096; + chacha_dosimd(state, walk->dst.virt.addr, + walk->src.virt.addr, nbytes, + ctx->nrounds); + kernel_fpu_end(); } - err = skcipher_walk_done(walk, walk->nbytes - nbytes); } @@ -164,19 +164,9 @@ static int chacha_simd(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; - int err; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) - return crypto_chacha_crypt(req); - - err = skcipher_walk_virt(&walk, req, true); - if (err) - return err; - - kernel_fpu_begin(); - err = chacha_simd_stream_xor(&walk, ctx, req->iv); - kernel_fpu_end(); - return err; + return skcipher_walk_virt(&walk, req, true) ?: + chacha_simd_stream_xor(&walk, ctx, req->iv); } static int xchacha_simd(struct skcipher_request *req) @@ -189,31 +179,42 @@ static int xchacha_simd(struct skcipher_request *req) u8 real_iv[16]; int err; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) - return crypto_xchacha_crypt(req); - err = skcipher_walk_virt(&walk, req, true); if (err) return err; BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); - crypto_chacha_init(state, ctx, req->iv); - - kernel_fpu_begin(); - - hchacha_block_ssse3(state, subctx.key, ctx->nrounds); + chacha_init_generic(state, ctx->key, req->iv); + + if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) { + kernel_fpu_begin(); + hchacha_block_ssse3(state, subctx.key, ctx->nrounds); + kernel_fpu_end(); + } else { + hchacha_block_generic(state, subctx.key, ctx->nrounds); + } subctx.nrounds = ctx->nrounds; memcpy(&real_iv[0], req->iv + 24, 8); memcpy(&real_iv[8], req->iv + 16, 8); err = chacha_simd_stream_xor(&walk, &subctx, real_iv); - kernel_fpu_end(); - return err; } +static int chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize) +{ + return chacha_setkey(tfm, key, keysize, 20); +} + +static int chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keysize) +{ + return chacha_setkey(tfm, key, keysize, 12); +} + static struct skcipher_alg algs[] = { { .base.cra_name = "chacha20", @@ -227,7 +228,7 @@ static struct skcipher_alg algs[] = { .max_keysize = CHACHA_KEY_SIZE, .ivsize = CHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, + .setkey = chacha20_setkey, .encrypt = chacha_simd, .decrypt = chacha_simd, }, { @@ -242,7 +243,7 @@ static struct skcipher_alg algs[] = { .max_keysize = CHACHA_KEY_SIZE, .ivsize = XCHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha20_setkey, + .setkey = chacha20_setkey, .encrypt = xchacha_simd, .decrypt = xchacha_simd, }, { @@ -257,7 +258,7 @@ static struct skcipher_alg algs[] = { .max_keysize = CHACHA_KEY_SIZE, .ivsize = XCHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, - .setkey = crypto_chacha12_setkey, + .setkey = chacha12_setkey, .encrypt = xchacha_simd, .decrypt = xchacha_simd, }, diff --git a/crypto/Kconfig b/crypto/Kconfig index b39ca79ef65f..86732709b171 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1439,7 +1439,7 @@ config CRYPTO_CHACHA20_X86_64 tristate "ChaCha stream cipher algorithms (x86_64/SSSE3/AVX2/AVX-512VL)" depends on X86 && 64BIT select CRYPTO_BLKCIPHER - select CRYPTO_CHACHA20 + select CRYPTO_LIB_CHACHA_GENERIC help SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20, XChaCha20, and XChaCha12 stream ciphers. -- 2.20.1