Hello Pascal, Thanks for looking into this. On Thu, 8 Aug 2019 at 10:20, Pascal van Leeuwen <pascalvanl@xxxxxxxxx> wrote: > > This adds support for Cipher Text Stealing for data blocks that are not an > integer multiple of the cipher block size in size, bringing it fully in > line with the IEEE P1619/D16 standard. > > This has been tested with the AES-XTS test vectors from the IEEE P1619/D16 > specification as well as some additional test vectors supplied to the > linux_crypto mailing list previously. It has also been fuzzed against > Inside Secure AES-XTS hardware which has been actively used in the field > for more than a decade already. > > changes since v1: > - Fixed buffer overflow issue due to subreq not being the last entry in > rctx, this turns out to be a crypto API requirement. Thanks to Milan > Broz <gmazyland@xxxxxxxxx> for finding this and providing the solution. > - Removed some redundant error returning code from the _finish_cts() > functions that currently cannot fail, therefore would always return 0. > - removed rem_bytes computation behind init_crypt() in the encrypt() and > decrypt() functions, no need to compute for lengths < 16 > - Fixed comment style for single line comments > Please put the changelog below the --- > Signed-off-by: Pascal van Leeuwen <pvanleeuwen@xxxxxxxxxxxxxx> > --- > crypto/xts.c | 229 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ > 1 file changed, 209 insertions(+), 20 deletions(-) > > diff --git a/crypto/xts.c b/crypto/xts.c > index 33cf726..17b551d 100644 > --- a/crypto/xts.c > +++ b/crypto/xts.c > @@ -1,7 +1,5 @@ > /* XTS: as defined in IEEE1619/D16 > * http://grouper.ieee.org/groups/1619/email/pdf00086.pdf > - * (sector sizes which are not a multiple of 16 bytes are, > - * however currently unsupported) > * > * Copyright (c) 2007 Rik Snel <rsnel@xxxxxxxxxxxxxxx> > * > @@ -28,6 +26,7 @@ > > struct priv { > struct crypto_skcipher *child; > + struct crypto_cipher *base; Why do you need a separate cipher for the ciphertext stealing? ECB can be invoked with a single block just fine, and it will behave exactly like the bare cipher. > struct crypto_cipher *tweak; > }; > > @@ -37,7 +36,9 @@ struct xts_instance_ctx { > }; > > struct rctx { > - le128 t; > + le128 t, tcur; > + int rem_bytes, is_encrypt; Instead of adding the is_encrypt flag, could we change crypt_done into encrypt_done/decrypt_done? > + /* must be the last, expanded beyond end of struct! */ > struct skcipher_request subreq; This is not sufficient. You have to add a TFM init() function which sets the request size. Please look at the cts code for an example. > }; > > @@ -47,6 +48,7 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key, > struct priv *ctx = crypto_skcipher_ctx(parent); > struct crypto_skcipher *child; > struct crypto_cipher *tweak; > + struct crypto_cipher *base; > int err; > > err = xts_verify_key(parent, key, keylen); > @@ -55,9 +57,11 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key, > > keylen /= 2; > > - /* we need two cipher instances: one to compute the initial 'tweak' > - * by encrypting the IV (usually the 'plain' iv) and the other > - * one to encrypt and decrypt the data */ > + /* we need three cipher instances: one to compute the initial 'tweak' > + * by encrypting the IV (usually the 'plain' iv), one to encrypt and > + * decrypt the data and finally one to encrypt the last block(s) for > + * cipher text stealing > + */ > > /* tweak cipher, uses Key2 i.e. the second half of *key */ > tweak = ctx->tweak; > @@ -79,6 +83,13 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key, > crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & > CRYPTO_TFM_RES_MASK); > > + /* Also data cipher, using Key1, for applying CTS */ > + base = ctx->base; > + crypto_cipher_clear_flags(base, CRYPTO_TFM_REQ_MASK); > + crypto_cipher_set_flags(base, crypto_skcipher_get_flags(parent) & > + CRYPTO_TFM_REQ_MASK); > + err = crypto_cipher_setkey(base, key, keylen); > + > return err; > } > > @@ -88,13 +99,12 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key, > * mutliple calls to the 'ecb(..)' instance, which usually would be slower than > * just doing the gf128mul_x_ble() calls again. > */ > -static int xor_tweak(struct skcipher_request *req, bool second_pass) > +static int xor_tweak(struct skcipher_request *req, bool second_pass, le128 *t) > { > struct rctx *rctx = skcipher_request_ctx(req); > struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); > const int bs = XTS_BLOCK_SIZE; > struct skcipher_walk w; > - le128 t = rctx->t; > int err; > > if (second_pass) { > @@ -104,6 +114,7 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass) > } > err = skcipher_walk_virt(&w, req, false); > > + *t = rctx->t; > while (w.nbytes) { > unsigned int avail = w.nbytes; > le128 *wsrc; > @@ -113,8 +124,8 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass) > wdst = w.dst.virt.addr; > > do { > - le128_xor(wdst++, &t, wsrc++); > - gf128mul_x_ble(&t, &t); > + le128_xor(wdst++, t, wsrc++); > + gf128mul_x_ble(t, t); > } while ((avail -= bs) >= bs); > > err = skcipher_walk_done(&w, avail); > @@ -123,14 +134,97 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass) > return err; > } > > -static int xor_tweak_pre(struct skcipher_request *req) > +static int xor_tweak_pre(struct skcipher_request *req, le128 *t) > +{ > + return xor_tweak(req, false, t); > +} > + > +static int xor_tweak_post(struct skcipher_request *req, le128 *t) > { > - return xor_tweak(req, false); > + return xor_tweak(req, true, t); > +} > + > +static void encrypt_finish_cts(struct skcipher_request *req) > +{ > + struct rctx *rctx = skcipher_request_ctx(req); > + /* Not a multiple of cipher blocksize, need CTS applied */ > + struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); > + le128 lastblock, lastptext; > + > + /* Handle last partial block - apply Cipher Text Stealing */ > + > + /* Copy last ciphertext block just processed to buffer */ > + sg_pcopy_to_buffer(req->dst, sg_nents(req->dst), &lastblock, > + XTS_BLOCK_SIZE, > + req->cryptlen - XTS_BLOCK_SIZE); > + /* Save last plaintext bytes, next step may overwrite!! */ > + sg_pcopy_to_buffer(req->src, sg_nents(req->src), &lastptext, > + rctx->rem_bytes, req->cryptlen); > + /* Copy first rem_bytes of ciphertext behind last full block */ > + sg_pcopy_from_buffer(req->dst, sg_nents(req->dst), &lastblock, > + rctx->rem_bytes, req->cryptlen); > + /* > + * Copy last remaining bytes of plaintext to combine buffer, > + * replacing part of the ciphertext > + */ > + memcpy(&lastblock, &lastptext, rctx->rem_bytes); > + /* XTS encrypt the combined block */ > + le128_xor(&lastblock, &rctx->tcur, &lastblock); > + crypto_cipher_encrypt_one(ctx->base, (u8 *)&lastblock, > + (u8 *)&lastblock); > + le128_xor(&lastblock, &rctx->tcur, &lastblock); > + /* Write combined block to dst as 2nd last cipherblock */ > + sg_pcopy_from_buffer(req->dst, sg_nents(req->dst), &lastblock, > + XTS_BLOCK_SIZE, > + req->cryptlen - XTS_BLOCK_SIZE); > + > + /* Fix up original request length */ > + req->cryptlen += rctx->rem_bytes; > + return; > } > > -static int xor_tweak_post(struct skcipher_request *req) > +static void decrypt_finish_cts(struct skcipher_request *req) > { > - return xor_tweak(req, true); > + struct rctx *rctx = skcipher_request_ctx(req); > + /* Not a multiple of cipher blocksize, need CTS applied */ > + struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); > + le128 tnext, lastblock, lastctext; > + > + /* Handle last 2 (partial) blocks - apply Cipher Text Stealing */ > + > + /* Copy last full ciphertext block to buffer */ > + sg_pcopy_to_buffer(req->src, sg_nents(req->src), &lastblock, > + XTS_BLOCK_SIZE, req->cryptlen); > + /* Decrypt last full block using *next* tweak */ > + gf128mul_x_ble(&tnext, &rctx->tcur); > + le128_xor(&lastblock, &tnext, &lastblock); > + crypto_cipher_decrypt_one(ctx->base, (u8 *)&lastblock, > + (u8 *)&lastblock); > + le128_xor(&lastblock, &tnext, &lastblock); > + /* Save last ciphertext bytes, next step may overwrite!! */ > + sg_pcopy_to_buffer(req->src, sg_nents(req->src), &lastctext, > + rctx->rem_bytes, req->cryptlen + XTS_BLOCK_SIZE); > + /* Copy first rem_bytes of this ptext as last partial block */ > + sg_pcopy_from_buffer(req->dst, sg_nents(req->dst), &lastblock, > + rctx->rem_bytes, > + req->cryptlen + XTS_BLOCK_SIZE); > + /* > + * Copy last remaining bytes of "plaintext" to combine buffer, > + * replacing part of the ciphertext > + */ > + memcpy(&lastblock, &lastctext, rctx->rem_bytes); > + /* XTS decrypt the combined block */ > + le128_xor(&lastblock, &rctx->tcur, &lastblock); > + crypto_cipher_decrypt_one(ctx->base, (u8 *)&lastblock, > + (u8 *)&lastblock); > + le128_xor(&lastblock, &rctx->tcur, &lastblock); > + /* Write combined block to dst as 2nd last plaintext block */ > + sg_pcopy_from_buffer(req->dst, sg_nents(req->dst), &lastblock, > + XTS_BLOCK_SIZE, req->cryptlen); > + > + /* Fix up original request length */ > + req->cryptlen += rctx->rem_bytes + XTS_BLOCK_SIZE; > + return; > } > > static void crypt_done(struct crypto_async_request *areq, int err) > @@ -139,9 +233,16 @@ static void crypt_done(struct crypto_async_request *areq, int err) > > if (!err) { > struct rctx *rctx = skcipher_request_ctx(req); > + le128 t; > > rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; > - err = xor_tweak_post(req); > + err = xor_tweak_post(req, &t); > + > + if (unlikely(!err && rctx->rem_bytes)) { > + rctx->is_encrypt ? > + encrypt_finish_cts(req) : > + decrypt_finish_cts(req); > + } > } > > skcipher_request_complete(req, err); > @@ -167,10 +268,45 @@ static int encrypt(struct skcipher_request *req) > struct rctx *rctx = skcipher_request_ctx(req); > struct skcipher_request *subreq = &rctx->subreq; > > + /* IEEE P1619 does not allow less data than block cipher blocksize */ > + if (unlikely(req->cryptlen < XTS_BLOCK_SIZE)) > + return -EINVAL; > + > init_crypt(req); > - return xor_tweak_pre(req) ?: > + > + /* valid bytes in last crypto block */ > + rctx->rem_bytes = req->cryptlen & (XTS_BLOCK_SIZE - 1); > + if (unlikely(rctx->rem_bytes)) { > + /* Not a multiple of cipher blocksize, need CTS applied */ > + int err = 0; > + > + /* First process all *full* cipher blocks */ > + req->cryptlen -= rctx->rem_bytes; > + subreq->cryptlen -= rctx->rem_bytes; > + err = xor_tweak_pre(req, &rctx->tcur); > + if (err) > + goto encrypt_exit; > + rctx->is_encrypt = 1; > + err = crypto_skcipher_encrypt(subreq); > + if (err) > + goto encrypt_exit; > + err = xor_tweak_post(req, &rctx->tcur); > + if (err) > + goto encrypt_exit; > + > + encrypt_finish_cts(req); > + return 0; > + > +encrypt_exit: > + /* Fix up original request length */ > + req->cryptlen += rctx->rem_bytes; > + return err; > + } > + > + /* Multiple of cipher blocksize, no CTS required */ > + return xor_tweak_pre(req, &rctx->tcur) ?: > crypto_skcipher_encrypt(subreq) ?: > - xor_tweak_post(req); > + xor_tweak_post(req, &rctx->tcur); > } > > static int decrypt(struct skcipher_request *req) > @@ -178,10 +314,50 @@ static int decrypt(struct skcipher_request *req) > struct rctx *rctx = skcipher_request_ctx(req); > struct skcipher_request *subreq = &rctx->subreq; > > + /* IEEE P1619 does not allow less data than block cipher blocksize */ > + if (unlikely(req->cryptlen < XTS_BLOCK_SIZE)) > + return -EINVAL; > + > init_crypt(req); > - return xor_tweak_pre(req) ?: > + > + /* valid bytes in last crypto block */ > + rctx->rem_bytes = req->cryptlen & (XTS_BLOCK_SIZE - 1); > + if (unlikely(rctx->rem_bytes)) { > + int err = 0; > + > + /* First process all but the last(!) full cipher blocks */ > + req->cryptlen -= rctx->rem_bytes + XTS_BLOCK_SIZE; > + subreq->cryptlen -= rctx->rem_bytes + XTS_BLOCK_SIZE; > + /* May not have any full blocks to process here */ > + if (req->cryptlen) { > + err = xor_tweak_pre(req, &rctx->tcur); > + if (err) > + goto decrypt_exit; > + rctx->is_encrypt = 0; > + err = crypto_skcipher_decrypt(subreq); > + if (err) > + goto decrypt_exit; > + err = xor_tweak_post(req, &rctx->tcur); > + if (err) > + goto decrypt_exit; > + } else { > + /* Start from initial tweak */ > + rctx->tcur = rctx->t; > + } > + > + decrypt_finish_cts(req); > + return 0; > + > +decrypt_exit: > + /* Fix up original request length */ > + req->cryptlen += rctx->rem_bytes + XTS_BLOCK_SIZE; > + return err; > + } > + > + /* Multiple of cipher blocksize, no CTS required */ > + return xor_tweak_pre(req, &rctx->tcur) ?: > crypto_skcipher_decrypt(subreq) ?: > - xor_tweak_post(req); > + xor_tweak_post(req, &rctx->tcur); > } > > static int init_tfm(struct crypto_skcipher *tfm) > @@ -191,6 +367,7 @@ static int init_tfm(struct crypto_skcipher *tfm) > struct priv *ctx = crypto_skcipher_ctx(tfm); > struct crypto_skcipher *child; > struct crypto_cipher *tweak; > + struct crypto_cipher *base; > > child = crypto_spawn_skcipher(&ictx->spawn); > if (IS_ERR(child)) > @@ -206,6 +383,16 @@ static int init_tfm(struct crypto_skcipher *tfm) > > ctx->tweak = tweak; > > + base = crypto_alloc_cipher(ictx->name, 0, 0); > + if (IS_ERR(base)) { > + crypto_free_skcipher(ctx->child); > + crypto_free_cipher(ctx->tweak); > + return PTR_ERR(base); > + } > + > + ctx->base = base; > + > + /* struct rctx expanded by sub cipher request size! */ > crypto_skcipher_set_reqsize(tfm, crypto_skcipher_reqsize(child) + > sizeof(struct rctx)); > > @@ -218,6 +405,7 @@ static void exit_tfm(struct crypto_skcipher *tfm) > > crypto_free_skcipher(ctx->child); > crypto_free_cipher(ctx->tweak); > + crypto_free_cipher(ctx->base); > } > > static void free(struct skcipher_instance *inst) > @@ -314,11 +502,12 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb) > > inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC; > inst->alg.base.cra_priority = alg->base.cra_priority; > - inst->alg.base.cra_blocksize = XTS_BLOCK_SIZE; > + inst->alg.base.cra_blocksize = 1; I don't think this is necessary or correct. > inst->alg.base.cra_alignmask = alg->base.cra_alignmask | > (__alignof__(u64) - 1); > > inst->alg.ivsize = XTS_BLOCK_SIZE; > + inst->alg.chunksize = XTS_BLOCK_SIZE; ... and you don't need this if you drop the above change. > inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) * 2; > inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg) * 2; > > -- > 1.8.3.1