On Mon, Oct 07, 2019 at 06:45:43PM +0200, Ard Biesheuvel wrote: > In preparation of extending the x86 ChaCha driver to also expose the ChaCha > library interface, drop the dependency on the chacha_generic crypto driver > as a non-SIMD fallback, and depend on the generic ChaCha library directly. > This way, we only pull in the code we actually need, without registering > a set of ChaCha skciphers that we will never use. > > Since turning the FPU on and off is cheap these days, simplify the SIMD > routine by dropping the per-page yield, which makes for a cleaner switch > to the library API as well. > > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> > --- > arch/x86/crypto/chacha_glue.c | 77 ++++++++++---------- > crypto/Kconfig | 2 +- > 2 files changed, 40 insertions(+), 39 deletions(-) > > diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c > index bc62daa8dafd..3a1a11a4326d 100644 > --- a/arch/x86/crypto/chacha_glue.c > +++ b/arch/x86/crypto/chacha_glue.c > @@ -127,32 +127,32 @@ static int chacha_simd_stream_xor(struct skcipher_walk *walk, > const struct chacha_ctx *ctx, const u8 *iv) > { > u32 *state, state_buf[16 + 2] __aligned(8); > - int next_yield = 4096; /* bytes until next FPU yield */ > + bool do_simd; > int err = 0; > > BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); > state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); > > - crypto_chacha_init(state, ctx, iv); > + chacha_init_generic(state, ctx->key, iv); > > + do_simd = (walk->total > CHACHA_BLOCK_SIZE) && crypto_simd_usable(); > while (walk->nbytes > 0) { > unsigned int nbytes = walk->nbytes; > > - if (nbytes < walk->total) { > + if (nbytes < walk->total) > nbytes = round_down(nbytes, walk->stride); > - next_yield -= nbytes; > - } > - > - chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr, > - nbytes, ctx->nrounds); > > - if (next_yield <= 0) { > - /* temporarily allow preemption */ > - kernel_fpu_end(); > + if (!do_simd) { > + chacha_crypt_generic(state, walk->dst.virt.addr, > + walk->src.virt.addr, nbytes, > + ctx->nrounds); > + } else { > kernel_fpu_begin(); > - next_yield = 4096; > + chacha_dosimd(state, walk->dst.virt.addr, > + walk->src.virt.addr, nbytes, > + ctx->nrounds); > + kernel_fpu_end(); Since the kernel_fpu_begin() and kernel_fpu_end() were moved here, it's now possible to simplify the code by moving the call to skcipher_walk_virt() into chacha_simd_stream_xor() rather than making the caller do it. I.e., see what the code was like prior to the following commit: commit f9c9bdb5131eee60dc3b92e5126d4c0e291703e2 Author: Eric Biggers <ebiggers@xxxxxxxxxx> Date: Sat Dec 15 12:40:17 2018 -0800 crypto: x86/chacha - avoid sleeping under kernel_fpu_begin() > } > - > err = skcipher_walk_done(walk, walk->nbytes - nbytes); > } > > @@ -164,19 +164,9 @@ static int chacha_simd(struct skcipher_request *req) > struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); > struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); > struct skcipher_walk walk; > - int err; > > - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) > - return crypto_chacha_crypt(req); > - > - err = skcipher_walk_virt(&walk, req, true); > - if (err) > - return err; > - > - kernel_fpu_begin(); > - err = chacha_simd_stream_xor(&walk, ctx, req->iv); > - kernel_fpu_end(); > - return err; > + return skcipher_walk_virt(&walk, req, true) ?: > + chacha_simd_stream_xor(&walk, ctx, req->iv); > } > > static int xchacha_simd(struct skcipher_request *req) > @@ -189,31 +179,42 @@ static int xchacha_simd(struct skcipher_request *req) > u8 real_iv[16]; > int err; > > - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) > - return crypto_xchacha_crypt(req); > - > err = skcipher_walk_virt(&walk, req, true); > if (err) > return err; > > BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16); > state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN); > - crypto_chacha_init(state, ctx, req->iv); > - > - kernel_fpu_begin(); > - > - hchacha_block_ssse3(state, subctx.key, ctx->nrounds); > + chacha_init_generic(state, ctx->key, req->iv); > + > + if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) { > + kernel_fpu_begin(); > + hchacha_block_ssse3(state, subctx.key, ctx->nrounds); > + kernel_fpu_end(); > + } else { > + hchacha_block_generic(state, subctx.key, ctx->nrounds); > + } > subctx.nrounds = ctx->nrounds; > > memcpy(&real_iv[0], req->iv + 24, 8); > memcpy(&real_iv[8], req->iv + 16, 8); > err = chacha_simd_stream_xor(&walk, &subctx, real_iv); > > - kernel_fpu_end(); > - > return err; Can use 'return chacha_simd_stream_xor(...') here. - Eric