On Fri, 16 Aug 2019 at 13:10, Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> wrote: > > Align the x86 code with the generic XTS template, which now supports > ciphertext stealing as described by the IEEE XTS-AES spec P1619. > > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> Oops, $SUBJECT should be x86/xts rather than aes/xts > --- > arch/x86/crypto/aesni-intel_glue.c | 1 + > arch/x86/crypto/camellia_aesni_avx2_glue.c | 1 + > arch/x86/crypto/camellia_aesni_avx_glue.c | 1 + > arch/x86/crypto/cast6_avx_glue.c | 1 + > arch/x86/crypto/glue_helper.c | 70 +++++++++++++++++++++- > arch/x86/crypto/serpent_avx2_glue.c | 1 + > arch/x86/crypto/serpent_avx_glue.c | 1 + > arch/x86/crypto/twofish_avx_glue.c | 1 + > arch/x86/include/asm/crypto/glue_helper.h | 1 + > 9 files changed, 76 insertions(+), 2 deletions(-) > > diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c > index ef165d8cf443..0ea1517faf09 100644 > --- a/arch/x86/crypto/aesni-intel_glue.c > +++ b/arch/x86/crypto/aesni-intel_glue.c > @@ -591,6 +591,7 @@ static const struct common_glue_ctx aesni_enc_xts = { > static const struct common_glue_ctx aesni_dec_xts = { > .num_funcs = 2, > .fpu_blocks_limit = 1, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = 8, > diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c > index abf298c272dc..7854378e6d0c 100644 > --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c > +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c > @@ -129,6 +129,7 @@ static const struct common_glue_ctx camellia_dec_cbc = { > static const struct common_glue_ctx camellia_dec_xts = { > .num_funcs = 3, > .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, > diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c > index 0c22d84750a3..58e8f08819b6 100644 > --- a/arch/x86/crypto/camellia_aesni_avx_glue.c > +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c > @@ -136,6 +136,7 @@ static const struct common_glue_ctx camellia_dec_cbc = { > static const struct common_glue_ctx camellia_dec_xts = { > .num_funcs = 2, > .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS, > diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c > index 645f8f16815c..dfdbdeff5a2b 100644 > --- a/arch/x86/crypto/cast6_avx_glue.c > +++ b/arch/x86/crypto/cast6_avx_glue.c > @@ -132,6 +132,7 @@ static const struct common_glue_ctx cast6_dec_cbc = { > static const struct common_glue_ctx cast6_dec_xts = { > .num_funcs = 2, > .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = CAST6_PARALLEL_BLOCKS, > diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c > index 901551445387..693541d00c0a 100644 > --- a/arch/x86/crypto/glue_helper.c > +++ b/arch/x86/crypto/glue_helper.c > @@ -14,6 +14,7 @@ > #include <crypto/b128ops.h> > #include <crypto/gf128mul.h> > #include <crypto/internal/skcipher.h> > +#include <crypto/scatterwalk.h> > #include <crypto/xts.h> > #include <asm/crypto/glue_helper.h> > > @@ -261,15 +262,34 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, > common_glue_func_t tweak_fn, void *tweak_ctx, > void *crypt_ctx) > { > + const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); > const unsigned int bsize = 128 / 8; > + struct skcipher_request subreq; > struct skcipher_walk walk; > bool fpu_enabled = false; > - unsigned int nbytes; > + unsigned int nbytes, tail; > int err; > > + if (req->cryptlen < XTS_BLOCK_SIZE) > + return -EINVAL; > + > + if (unlikely(cts)) { > + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); > + > + tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE; > + > + skcipher_request_set_tfm(&subreq, tfm); > + skcipher_request_set_callback(&subreq, > + crypto_skcipher_get_flags(tfm), > + NULL, NULL); > + skcipher_request_set_crypt(&subreq, req->src, req->dst, > + req->cryptlen - tail, req->iv); > + req = &subreq; > + } > + > err = skcipher_walk_virt(&walk, req, false); > nbytes = walk.nbytes; > - if (!nbytes) > + if (err) > return err; > > /* set minimum length to bsize, for tweak_fn */ > @@ -287,6 +307,52 @@ int glue_xts_req_128bit(const struct common_glue_ctx *gctx, > nbytes = walk.nbytes; > } > > + if (unlikely(cts)) { > + struct scatterlist *src, *dst; > + struct scatterlist s[2], d[2]; > + le128 final_tweak; > + u8 *next_tweak; > + le128 b[2]; > + > + dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen); > + if (req->dst != req->src) > + dst = scatterwalk_ffwd(d, req->dst, req->cryptlen); > + > + if (gctx->xts_decrypt) { > + final_tweak = *(le128 *)req->iv; > + next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE); > + gf128mul_x_ble(b, b); > + } else { > + next_tweak = req->iv; > + } > + > + skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE, > + next_tweak); > + > + err = skcipher_walk_virt(&walk, req, false) ?: > + skcipher_walk_done(&walk, > + __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); > + if (err) > + goto out; > + > + scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0); > + memcpy(b + 1, b, tail - XTS_BLOCK_SIZE); > + scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE, > + tail - XTS_BLOCK_SIZE, 0); > + scatterwalk_map_and_copy(b, dst, 0, tail, 1); > + > + skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE, > + gctx->xts_decrypt ? (u8 *)&final_tweak > + : req->iv); > + > + err = skcipher_walk_virt(&walk, req, false) ?: > + skcipher_walk_done(&walk, > + __glue_xts_req_128bit(gctx, crypt_ctx, &walk)); > + if (err) > + goto out; > + } > + > +out: > glue_fpu_end(fpu_enabled); > > return err; > diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c > index b871728e0b2f..b7b82bf69a67 100644 > --- a/arch/x86/crypto/serpent_avx2_glue.c > +++ b/arch/x86/crypto/serpent_avx2_glue.c > @@ -121,6 +121,7 @@ static const struct common_glue_ctx serpent_dec_cbc = { > static const struct common_glue_ctx serpent_dec_xts = { > .num_funcs = 3, > .fpu_blocks_limit = 8, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = 16, > diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c > index 4a9a9f2ee1d8..c46d722ecc12 100644 > --- a/arch/x86/crypto/serpent_avx_glue.c > +++ b/arch/x86/crypto/serpent_avx_glue.c > @@ -164,6 +164,7 @@ static const struct common_glue_ctx serpent_dec_cbc = { > static const struct common_glue_ctx serpent_dec_xts = { > .num_funcs = 2, > .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = SERPENT_PARALLEL_BLOCKS, > diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c > index 0dbf8e8b09d7..220de42bc3ab 100644 > --- a/arch/x86/crypto/twofish_avx_glue.c > +++ b/arch/x86/crypto/twofish_avx_glue.c > @@ -167,6 +167,7 @@ static const struct common_glue_ctx twofish_dec_cbc = { > static const struct common_glue_ctx twofish_dec_xts = { > .num_funcs = 2, > .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS, > + .xts_decrypt = 1, > > .funcs = { { > .num_blocks = TWOFISH_PARALLEL_BLOCKS, > diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h > index d1818634ae7e..31e038759a54 100644 > --- a/arch/x86/include/asm/crypto/glue_helper.h > +++ b/arch/x86/include/asm/crypto/glue_helper.h > @@ -36,6 +36,7 @@ struct common_glue_func_entry { > struct common_glue_ctx { > unsigned int num_funcs; > int fpu_blocks_limit; /* -1 means fpu not needed at all */ > + int xts_decrypt; /* whether this implements XTS decryption */ > > /* > * First funcs entry must have largest num_blocks and last funcs entry > -- > 2.17.1 >