Make kernel_fpu_begin() and kernel_fpu_end() calls around each assembly language function that uses FPU context, rather than around the entire set (init, ad, crypt, final). Limit the processing of bulk data based on a module parameter, so multiple blocks are processed within one FPU context (associated data is not limited). Allow the skcipher_walk functions to sleep again, since they are is no longer called inside FPU context. Motivation: calling crypto_aead_encrypt() with a single scatter-gather list entry pointing to a 1 MiB plaintext buffer caused the aesni_encrypt function to receive a length of 1048576 bytes and consume 306348 cycles within FPU context to process that data. Fixes: 1d373d4e8e15 ("crypto: x86 - Add optimized AEGIS implementations") Fixes: ba6771c0a0bc ("crypto: x86/aegis - fix handling chunked inputs and MAY_SLEEP") Signed-off-by: Robert Elliott <elliott@xxxxxxx> --- arch/x86/crypto/aegis128-aesni-glue.c | 39 ++++++++++++++++++++------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index 4623189000d8..6e96bdda2811 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -23,6 +23,9 @@ #define AEGIS128_MIN_AUTH_SIZE 8 #define AEGIS128_MAX_AUTH_SIZE 16 +/* avoid kernel_fpu_begin/end scheduler/rcu stalls */ +static const unsigned int bytes_per_fpu = 4 * 1024; + asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv); asmlinkage void crypto_aegis128_aesni_ad( @@ -85,15 +88,19 @@ static void crypto_aegis128_aesni_process_ad( if (pos > 0) { unsigned int fill = AEGIS128_BLOCK_SIZE - pos; memcpy(buf.bytes + pos, src, fill); - crypto_aegis128_aesni_ad(state, + kernel_fpu_begin(); + crypto_aegis128_aesni_ad(state->blocks, AEGIS128_BLOCK_SIZE, buf.bytes); + kernel_fpu_end(); pos = 0; left -= fill; src += fill; } - crypto_aegis128_aesni_ad(state, left, src); + kernel_fpu_begin(); + crypto_aegis128_aesni_ad(state->blocks, left, src); + kernel_fpu_end(); src += left & ~(AEGIS128_BLOCK_SIZE - 1); left &= AEGIS128_BLOCK_SIZE - 1; @@ -110,7 +117,9 @@ static void crypto_aegis128_aesni_process_ad( if (pos > 0) { memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos); - crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes); + kernel_fpu_begin(); + crypto_aegis128_aesni_ad(state->blocks, AEGIS128_BLOCK_SIZE, buf.bytes); + kernel_fpu_end(); } } @@ -119,15 +128,23 @@ static void crypto_aegis128_aesni_process_crypt( const struct aegis_crypt_ops *ops) { while (walk->nbytes >= AEGIS128_BLOCK_SIZE) { - ops->crypt_blocks(state, - round_down(walk->nbytes, AEGIS128_BLOCK_SIZE), + unsigned int chunk = min(walk->nbytes, bytes_per_fpu); + + chunk = round_down(chunk, AEGIS128_BLOCK_SIZE); + + kernel_fpu_begin(); + ops->crypt_blocks(state->blocks, chunk, walk->src.virt.addr, walk->dst.virt.addr); - skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE); + kernel_fpu_end(); + + skcipher_walk_done(walk, walk->nbytes - chunk); } if (walk->nbytes) { - ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr, + kernel_fpu_begin(); + ops->crypt_tail(state->blocks, walk->nbytes, walk->src.virt.addr, walk->dst.virt.addr); + kernel_fpu_end(); skcipher_walk_done(walk, 0); } } @@ -172,15 +189,17 @@ static void crypto_aegis128_aesni_crypt(struct aead_request *req, struct skcipher_walk walk; struct aegis_state state; - ops->skcipher_walk_init(&walk, req, true); + ops->skcipher_walk_init(&walk, req, false); kernel_fpu_begin(); + crypto_aegis128_aesni_init(&state.blocks, ctx->key.bytes, req->iv); + kernel_fpu_end(); - crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv); crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); crypto_aegis128_aesni_process_crypt(&state, &walk, ops); - crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); + kernel_fpu_begin(); + crypto_aegis128_aesni_final(&state.blocks, tag_xor, req->assoclen, cryptlen); kernel_fpu_end(); } -- 2.38.1