Align the x86 code with the generic XTS template, which now supports ciphertext stealing as described by the IEEE XTS-AES spec P1619. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> --- arch/x86/crypto/aesni-intel_glue.c | 1 + arch/x86/crypto/camellia_aesni_avx2_glue.c | 1 + arch/x86/crypto/camellia_aesni_avx_glue.c | 1 + arch/x86/crypto/cast6_avx_glue.c | 1 + arch/x86/crypto/glue_helper.c | 70 +++++++++++++++++++++- arch/x86/crypto/serpent_avx2_glue.c | 1 + arch/x86/crypto/serpent_avx_glue.c | 1 + arch/x86/crypto/twofish_avx_glue.c | 1 + arch/x86/include/asm/crypto/glue_helper.h | 1 + 9 files changed, 76 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ef165d8cf443..0ea1517faf09 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -591,6 +591,7 @@ static const struct common_glue_ctx aesni_enc_xts = { static const struct common_glue_ctx aesni_dec_xts = { .num_funcs = 2, .fpu_blocks_limit = 1, + .xts_decrypt = 1, .funcs = { { .num_blocks = 8, diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index abf298c272dc..7854378e6d0c 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -129,6 +129,7 @@ static const struct common_glue_ctx camellia_dec_cbc = { static const struct common_glue_ctx camellia_dec_xts = { .num_funcs = 3, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, + .xts_decrypt = 1, .funcs = { { .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 0c22d84750a3..58e8f08819b6 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -136,6 +136,7 @@ static const struct common_glue_ctx camellia_dec_cbc = { static const struct common_glue_ctx camellia_dec_xts = { .num_funcs = 2, .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, + .xts_decrypt = 1, .funcs = { { .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 645f8f16815c..dfdbdeff5a2b 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -132,6 +132,7 @@ static const struct common_glue_ctx cast6_dec_cbc = { static const struct common_glue_ctx cast6_dec_xts = { .num_funcs = 2, .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, + .xts_decrypt = 1, .funcs = { { .num_blocks = CAST6_PARALLEL_BLOCKS, diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 901551445387..693541d00c0a 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -14,6 +14,7 @@ #include <crypto/b128ops.h> #include <crypto/gf128mul.h> #include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> #include <crypto/xts.h> #include <asm/crypto/glue_helper.h> @@ -261,15 +262,34 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, common_glue_func_t tweak_fn, void *tweak_ctx, void *crypt_ctx) { + const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); const unsigned int bsize = 128 / 8; + struct skcipher_request subreq; struct skcipher_walk walk; bool fpu_enabled = false; - unsigned int nbytes; + unsigned int nbytes, tail; int err; + if (req->cryptlen < XTS_BLOCK_SIZE) + return -EINVAL; + + if (unlikely(cts)) { + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + + tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE; + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + crypto_skcipher_get_flags(tfm), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + req->cryptlen - tail, req->iv); + req = &subreq; + } + err = skcipher_walk_virt(&walk, req, false); nbytes = walk.nbytes; - if (!nbytes) + if (err) return err; /* set minimum length to bsize, for tweak_fn */ @@ -287,6 +307,52 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, nbytes = walk.nbytes; } + if (unlikely(cts)) { + struct scatterlist *src, *dst; + struct scatterlist s[2], d[2]; + le128 final_tweak; + u8 *next_tweak; + le128 b[2]; + + dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(d, req->dst, req->cryptlen); + + if (gctx->xts_decrypt) { + final_tweak = *(le128 *)req->iv; + next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE); + gf128mul_x_ble(b, b); + } else { + next_tweak = req->iv; + } + + skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE, + next_tweak); + + err = skcipher_walk_virt(&walk, req, false) ?: + skcipher_walk_done(&walk, + __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); + if (err) + goto out; + + scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0); + memcpy(b + 1, b, tail - XTS_BLOCK_SIZE); + scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE, + tail - XTS_BLOCK_SIZE, 0); + scatterwalk_map_and_copy(b, dst, 0, tail, 1); + + skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE, + gctx->xts_decrypt ? (u8 *)&final_tweak + : req->iv); + + err = skcipher_walk_virt(&walk, req, false) ?: + skcipher_walk_done(&walk, + __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); + if (err) + goto out; + } + +out: glue_fpu_end(fpu_enabled); return err; diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index b871728e0b2f..b7b82bf69a67 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -121,6 +121,7 @@ static const struct common_glue_ctx serpent_dec_cbc = { static const struct common_glue_ctx serpent_dec_xts = { .num_funcs = 3, .fpu_blocks_limit = 8, + .xts_decrypt = 1, .funcs = { { .num_blocks = 16, diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 4a9a9f2ee1d8..c46d722ecc12 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -164,6 +164,7 @@ static const struct common_glue_ctx serpent_dec_cbc = { static const struct common_glue_ctx serpent_dec_xts = { .num_funcs = 2, .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, + .xts_decrypt = 1, .funcs = { { .num_blocks = SERPENT_PARALLEL_BLOCKS, diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 0dbf8e8b09d7..220de42bc3ab 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -167,6 +167,7 @@ static const struct common_glue_ctx twofish_dec_cbc = { static const struct common_glue_ctx twofish_dec_xts = { .num_funcs = 2, .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, + .xts_decrypt = 1, .funcs = { { .num_blocks = TWOFISH_PARALLEL_BLOCKS, diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index d1818634ae7e..31e038759a54 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -36,6 +36,7 @@ struct common_glue_func_entry { struct common_glue_ctx { unsigned int num_funcs; int fpu_blocks_limit; /* -1 means fpu not needed at all */ + int xts_decrypt; /* whether this implements XTS decryption */ /* * First funcs entry must have largest num_blocks and last funcs entry -- 2.17.1