Ard, Nitpicking: you patch does not fix the comment at the top stating that sector sizes which are not a multiple of 16 bytes are not supported. Otherwise, it works fine over here and I like the way you actually queue up that final cipher call, which largely addresses my performance concerns w.r.t. hardware acceleration :-) > -----Original Message----- > From: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> > Sent: Friday, August 9, 2019 8:31 AM > To: linux-crypto@xxxxxxxxxxxxxxx > Cc: herbert@xxxxxxxxxxxxxxxxxxx; ebiggers@xxxxxxxxxx; Ard Biesheuvel > <ard.biesheuvel@xxxxxxxxxx>; Pascal Van Leeuwen <pvanleeuwen@xxxxxxxxxxxxxx>; Ondrej > Mosnacek <omosnace@xxxxxxxxxx>; Milan Broz <gmazyland@xxxxxxxxx> > Subject: [PATCH] crypto: xts - add support for ciphertext stealing > > Add support for the missing ciphertext stealing part of the XTS-AES > specification, which permits inputs of any size >= the block size. > > Cc: Pascal van Leeuwen <pvanleeuwen@xxxxxxxxxxxxxx> > Cc: Ondrej Mosnacek <omosnace@xxxxxxxxxx> > Cc: Milan Broz <gmazyland@xxxxxxxxx> > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> Tested-by: Pascal van Leeuwen <pvanleeuwen@xxxxxxxxxxxxxx> > --- > This is an alternative approach to Pascal's [0]: instead of instantiating > a separate cipher to deal with the tail, invoke the same ECB skcipher used > for the bulk of the data. > > [0] https://lore.kernel.org/linux-crypto/1565245094-8584-1-git-send-email- > pvanleeuwen@xxxxxxxxxxxxxx/ > > crypto/xts.c | 148 +++++++++++++++++--- > 1 file changed, 130 insertions(+), 18 deletions(-) > > diff --git a/crypto/xts.c b/crypto/xts.c > index 11211003db7e..fc9edc6eb11e 100644 > --- a/crypto/xts.c > +++ b/crypto/xts.c > @@ -34,6 +34,7 @@ struct xts_instance_ctx { > > struct rctx { > le128 t; > + struct scatterlist sg[2]; > struct skcipher_request subreq; > }; > > @@ -84,10 +85,11 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key, > * mutliple calls to the 'ecb(..)' instance, which usually would be slower than > * just doing the gf128mul_x_ble() calls again. > */ > -static int xor_tweak(struct skcipher_request *req, bool second_pass) > +static int xor_tweak(struct skcipher_request *req, bool second_pass, bool enc) > { > struct rctx *rctx = skcipher_request_ctx(req); > struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); > + const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); > const int bs = XTS_BLOCK_SIZE; > struct skcipher_walk w; > le128 t = rctx->t; > @@ -109,6 +111,20 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass) > wdst = w.dst.virt.addr; > > do { > + if (unlikely(cts) && > + w.total - w.nbytes + avail < 2 * XTS_BLOCK_SIZE) { > + if (!enc) { > + if (second_pass) > + rctx->t = t; > + gf128mul_x_ble(&t, &t); > + } > + le128_xor(wdst, &t, wsrc); > + if (enc && second_pass) > + gf128mul_x_ble(&rctx->t, &t); > + skcipher_walk_done(&w, avail - bs); > + return 0; > + } > + > le128_xor(wdst++, &t, wsrc++); > gf128mul_x_ble(&t, &t); > } while ((avail -= bs) >= bs); > @@ -119,17 +135,70 @@ static int xor_tweak(struct skcipher_request *req, bool second_pass) > return err; > } > > -static int xor_tweak_pre(struct skcipher_request *req) > +static int xor_tweak_pre(struct skcipher_request *req, bool enc) > { > - return xor_tweak(req, false); > + return xor_tweak(req, false, enc); > } > > -static int xor_tweak_post(struct skcipher_request *req) > +static int xor_tweak_post(struct skcipher_request *req, bool enc) > { > - return xor_tweak(req, true); > + return xor_tweak(req, true, enc); > } > > -static void crypt_done(struct crypto_async_request *areq, int err) > +static void cts_done(struct crypto_async_request *areq, int err) > +{ > + struct skcipher_request *req = areq->data; > + le128 b; > + > + if (!err) { > + struct rctx *rctx = skcipher_request_ctx(req); > + > + scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 0); > + le128_xor(&b, &rctx->t, &b); > + scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 1); > + } > + > + skcipher_request_complete(req, err); > +} > + > +static int cts_final(struct skcipher_request *req, > + int (*crypt)(struct skcipher_request *req)) > +{ > + struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); > + int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1); > + struct rctx *rctx = skcipher_request_ctx(req); > + struct skcipher_request *subreq = &rctx->subreq; > + int tail = req->cryptlen % XTS_BLOCK_SIZE; > + struct scatterlist *sg; > + le128 b[2]; > + int err; > + > + sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - XTS_BLOCK_SIZE); > + > + scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0); > + memcpy(b + 1, b, tail); > + scatterwalk_map_and_copy(b, req->src, offset, tail, 0); > + > + le128_xor(b, &rctx->t, b); > + > + scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE + tail, 1); > + > + skcipher_request_set_tfm(subreq, ctx->child); > + skcipher_request_set_callback(subreq, req->base.flags, cts_done, req); > + skcipher_request_set_crypt(subreq, sg, sg, XTS_BLOCK_SIZE, NULL); > + > + err = crypt(subreq); > + if (err) > + return err; > + > + scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0); > + le128_xor(b, &rctx->t, b); > + scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 1); > + > + return 0; > +} > + > +static void encrypt_done(struct crypto_async_request *areq, int err) > { > struct skcipher_request *req = areq->data; > > @@ -137,47 +206,90 @@ static void crypt_done(struct crypto_async_request *areq, int err) > struct rctx *rctx = skcipher_request_ctx(req); > > rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; > - err = xor_tweak_post(req); > + err = xor_tweak_post(req, true); > + > + if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { > + err = cts_final(req, crypto_skcipher_encrypt); > + if (err == -EINPROGRESS) > + return; > + } > } > > skcipher_request_complete(req, err); > } > > -static void init_crypt(struct skcipher_request *req) > +static void decrypt_done(struct crypto_async_request *areq, int err) > +{ > + struct skcipher_request *req = areq->data; > + > + if (!err) { > + struct rctx *rctx = skcipher_request_ctx(req); > + > + rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; > + err = xor_tweak_post(req, false); > + > + if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { > + err = cts_final(req, crypto_skcipher_decrypt); > + if (err == -EINPROGRESS) > + return; > + } > + } > + > + skcipher_request_complete(req, err); > +} > + > +static int init_crypt(struct skcipher_request *req, crypto_completion_t compl) > { > struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); > struct rctx *rctx = skcipher_request_ctx(req); > struct skcipher_request *subreq = &rctx->subreq; > > + if (req->cryptlen < XTS_BLOCK_SIZE) > + return -EINVAL; > + > skcipher_request_set_tfm(subreq, ctx->child); > - skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req); > + skcipher_request_set_callback(subreq, req->base.flags, compl, req); > skcipher_request_set_crypt(subreq, req->dst, req->dst, > - req->cryptlen, NULL); > + req->cryptlen & ~(XTS_BLOCK_SIZE - 1), NULL); > > /* calculate first value of T */ > crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv); > + > + return 0; > } > > static int encrypt(struct skcipher_request *req) > { > struct rctx *rctx = skcipher_request_ctx(req); > struct skcipher_request *subreq = &rctx->subreq; > + int err; > > - init_crypt(req); > - return xor_tweak_pre(req) ?: > - crypto_skcipher_encrypt(subreq) ?: > - xor_tweak_post(req); > + err = init_crypt(req, encrypt_done) ?: > + xor_tweak_pre(req, true) ?: > + crypto_skcipher_encrypt(subreq) ?: > + xor_tweak_post(req, true); > + > + if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0)) > + return err; > + > + return cts_final(req, crypto_skcipher_encrypt); > } > > static int decrypt(struct skcipher_request *req) > { > struct rctx *rctx = skcipher_request_ctx(req); > struct skcipher_request *subreq = &rctx->subreq; > + int err; > + > + err = init_crypt(req, decrypt_done) ?: > + xor_tweak_pre(req, false) ?: > + crypto_skcipher_decrypt(subreq) ?: > + xor_tweak_post(req, false); > + > + if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0)) > + return err; > > - init_crypt(req); > - return xor_tweak_pre(req) ?: > - crypto_skcipher_decrypt(subreq) ?: > - xor_tweak_post(req); > + return cts_final(req, crypto_skcipher_decrypt); > } > > static int init_tfm(struct crypto_skcipher *tfm) > -- > 2.17.1 Regards, Pascal van Leeuwen Silicon IP Architect, Multi-Protocol Engines @ Verimatrix www.insidesecure.com