In order to reduce the number of invocations of the RFC7539 template into the Poly1305 driver, implement the new internal .update_from_sg method that allows the driver to amortize the cost of FPU preserve/ restore sequences over a larger chunk of input. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> --- arch/x86/crypto/poly1305_glue.c | 54 ++++++++++++++++---- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 4a1c05dce950..f2afaa8e23c2 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -115,18 +115,11 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx, return srclen; } -static int poly1305_simd_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) +static void poly1305_simd_do_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) { - struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); unsigned int bytes; - /* kernel_fpu_begin/end is costly, use fallback for small updates */ - if (srclen <= 288 || !crypto_simd_usable()) - return crypto_poly1305_update(desc, src, srclen); - - kernel_fpu_begin(); - if (unlikely(dctx->buflen)) { bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); memcpy(dctx->buf + dctx->buflen, src, bytes); @@ -147,12 +140,50 @@ static int poly1305_simd_update(struct shash_desc *desc, srclen = bytes; } - kernel_fpu_end(); - if (unlikely(srclen)) { dctx->buflen = srclen; memcpy(dctx->buf, src, srclen); } +} + +static int poly1305_simd_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + /* kernel_fpu_begin/end is costly, use fallback for small updates */ + if (srclen <= 288 || !crypto_simd_usable()) + return crypto_poly1305_update(desc, src, srclen); + + kernel_fpu_begin(); + poly1305_simd_do_update(desc, src, srclen); + kernel_fpu_end(); + + return 0; +} + +static int poly1305_simd_update_from_sg(struct shash_desc *desc, + struct scatterlist *sg, + unsigned int srclen, + int flags) +{ + bool do_simd = crypto_simd_usable() && srclen > 288; + struct crypto_hash_walk walk; + int nbytes; + + if (do_simd) { + kernel_fpu_begin(); + flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; + } + + for (nbytes = crypto_shash_walk_sg(desc, sg, srclen, &walk, flags); + nbytes > 0; + nbytes = crypto_hash_walk_done(&walk, 0)) { + if (do_simd) + poly1305_simd_do_update(desc, walk.data, nbytes); + else + crypto_poly1305_update(desc, walk.data, nbytes); + } + if (do_simd) + kernel_fpu_end(); return 0; } @@ -161,6 +192,7 @@ static struct shash_alg alg = { .digestsize = POLY1305_DIGEST_SIZE, .init = poly1305_simd_init, .update = poly1305_simd_update, + .update_from_sg = poly1305_simd_update_from_sg, .final = crypto_poly1305_final, .descsize = sizeof(struct poly1305_simd_desc_ctx), .base = { -- 2.20.1