> -----Original Message----- > From: linux-crypto-owner@xxxxxxxxxxxxxxx <linux-crypto-owner@xxxxxxxxxxxxxxx> On Behalf Of > Pascal Van Leeuwen > Sent: Friday, August 9, 2019 12:22 PM > To: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx>; linux-crypto@xxxxxxxxxxxxxxx > Cc: herbert@xxxxxxxxxxxxxxxxxxx; ebiggers@xxxxxxxxxx; Ondrej Mosnacek > <omosnace@xxxxxxxxxx>; Milan Broz <gmazyland@xxxxxxxxx> > Subject: RE: [PATCH] crypto: xts - add support for ciphertext stealing > > Ard, > > Nitpicking: you patch does not fix the comment at the top stating that > sector sizes which are not a multiple of 16 bytes are not supported. > > Otherwise, it works fine over here and I like the way you actually > queue up that final cipher call, which largely addresses my performance > concerns w.r.t. hardware acceleration :-) > Actually, I just noticed it did NOT work fine, the first CTS vector (5) was failing. Sorry for missing that little detail before. Setting cra_blocksize to 1 instead of 16 solves that issue. Still sure cra_blocksize should be set to 16? Because to me, that doesn't make sense for something that is fundamentally NOT a blockcipher. > > > -----Original Message----- > > From: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> > > Sent: Friday, August 9, 2019 8:31 AM > > To: linux-crypto@xxxxxxxxxxxxxxx > > Cc: herbert@xxxxxxxxxxxxxxxxxxx; ebiggers@xxxxxxxxxx; Ard Biesheuvel > > <ard.biesheuvel@xxxxxxxxxx>; Pascal Van Leeuwen <pvanleeuwen@xxxxxxxxxxxxxx>; Ondrej > > Mosnacek <omosnace@xxxxxxxxxx>; Milan Broz <gmazyland@xxxxxxxxx> > > Subject: [PATCH] crypto: xts - add support for ciphertext stealing > > > > Add support for the missing ciphertext stealing part of the XTS-AES > > specification, which permits inputs of any size >= the block size. > > > > Cc: Pascal van Leeuwen <pvanleeuwen@xxxxxxxxxxxxxx> > > Cc: Ondrej Mosnacek <omosnace@xxxxxxxxxx> > > Cc: Milan Broz <gmazyland@xxxxxxxxx> > > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> > > Tested-by: Pascal van Leeuwen <pvanleeuwen@xxxxxxxxxxxxxx> > Eh ... tested yes ... working ... no ... > > --- > > This is an alternative approach to Pascal's [0]: instead of instantiating > > a separate cipher to deal with the tail, invoke the same ECB skcipher used > > for the bulk of the data. > > > > [0] https://lore.kernel.org/linux-crypto/1565245094-8584-1-git-send-email- > > pvanleeuwen@xxxxxxxxxxxxxx/ > > > > crypto/xts.c | 148 +++++++++++++++++--- > > 1 file changed, 130 insertions(+), 18 deletions(-) > > > > diff --git a/crypto/xts.c b/crypto/xts.c > > index 11211003db7e..fc9edc6eb11e 100644 > > --- a/crypto/xts.c > > +++ b/crypto/xts.c > > @@ -34,6 +34,7 @@ struct xts_instance_ctx { > > > > struct rctx { > > le128 t; > > + struct scatterlist sg[2]; > > struct skcipher_request subreq; > > }; > > > > @@ -84,10 +85,11 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key, > > * mutliple calls to the 'ecb(..)' instance, which usually would be slower than > > * just doing the gf128mul_x_ble() calls again. > > */ > > -static int xor_tweak(struct skcipher_request *req, bool second_pass) > > +static int xor_tweak(struct skcipher_request *req, bool second_pass, bool enc) > > { > > struct rctx *rctx = skcipher_request_ctx(req); > > struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); > > + const bool cts = (req->cryptlen % XTS_BLOCK_SIZE); > > const int bs = XTS_BLOCK_SIZE; > > struct skcipher_walk w; > > le128 t = rctx->t; > > @@ -109,6 +111,20 @@ static int xor_tweak(struct skcipher_request *req, bool > second_pass) > > wdst = w.dst.virt.addr; > > > > do { > > + if (unlikely(cts) && > > + w.total - w.nbytes + avail < 2 * XTS_BLOCK_SIZE) { > > + if (!enc) { > > + if (second_pass) > > + rctx->t = t; > > + gf128mul_x_ble(&t, &t); > > + } > > + le128_xor(wdst, &t, wsrc); > > + if (enc && second_pass) > > + gf128mul_x_ble(&rctx->t, &t); > > + skcipher_walk_done(&w, avail - bs); > > + return 0; > > + } > > + > > le128_xor(wdst++, &t, wsrc++); > > gf128mul_x_ble(&t, &t); > > } while ((avail -= bs) >= bs); > > @@ -119,17 +135,70 @@ static int xor_tweak(struct skcipher_request *req, bool > second_pass) > > return err; > > } > > > > -static int xor_tweak_pre(struct skcipher_request *req) > > +static int xor_tweak_pre(struct skcipher_request *req, bool enc) > > { > > - return xor_tweak(req, false); > > + return xor_tweak(req, false, enc); > > } > > > > -static int xor_tweak_post(struct skcipher_request *req) > > +static int xor_tweak_post(struct skcipher_request *req, bool enc) > > { > > - return xor_tweak(req, true); > > + return xor_tweak(req, true, enc); > > } > > > > -static void crypt_done(struct crypto_async_request *areq, int err) > > +static void cts_done(struct crypto_async_request *areq, int err) > > +{ > > + struct skcipher_request *req = areq->data; > > + le128 b; > > + > > + if (!err) { > > + struct rctx *rctx = skcipher_request_ctx(req); > > + > > + scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 0); > > + le128_xor(&b, &rctx->t, &b); > > + scatterwalk_map_and_copy(&b, rctx->sg, 0, XTS_BLOCK_SIZE, 1); > > + } > > + > > + skcipher_request_complete(req, err); > > +} > > + > > +static int cts_final(struct skcipher_request *req, > > + int (*crypt)(struct skcipher_request *req)) > > +{ > > + struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); > > + int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1); > > + struct rctx *rctx = skcipher_request_ctx(req); > > + struct skcipher_request *subreq = &rctx->subreq; > > + int tail = req->cryptlen % XTS_BLOCK_SIZE; > > + struct scatterlist *sg; > > + le128 b[2]; > > + int err; > > + > > + sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - XTS_BLOCK_SIZE); > > + > > + scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0); > > + memcpy(b + 1, b, tail); > > + scatterwalk_map_and_copy(b, req->src, offset, tail, 0); > > + > > + le128_xor(b, &rctx->t, b); > > + > > + scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE + tail, 1); > > + > > + skcipher_request_set_tfm(subreq, ctx->child); > > + skcipher_request_set_callback(subreq, req->base.flags, cts_done, req); > > + skcipher_request_set_crypt(subreq, sg, sg, XTS_BLOCK_SIZE, NULL); > > + > > + err = crypt(subreq); > > + if (err) > > + return err; > > + > > + scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 0); > > + le128_xor(b, &rctx->t, b); > > + scatterwalk_map_and_copy(b, sg, 0, XTS_BLOCK_SIZE, 1); > > + > > + return 0; > > +} > > + > > +static void encrypt_done(struct crypto_async_request *areq, int err) > > { > > struct skcipher_request *req = areq->data; > > > > @@ -137,47 +206,90 @@ static void crypt_done(struct crypto_async_request *areq, int err) > > struct rctx *rctx = skcipher_request_ctx(req); > > > > rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; > > - err = xor_tweak_post(req); > > + err = xor_tweak_post(req, true); > > + > > + if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { > > + err = cts_final(req, crypto_skcipher_encrypt); > > + if (err == -EINPROGRESS) > > + return; > > + } > > } > > > > skcipher_request_complete(req, err); > > } > > > > -static void init_crypt(struct skcipher_request *req) > > +static void decrypt_done(struct crypto_async_request *areq, int err) > > +{ > > + struct skcipher_request *req = areq->data; > > + > > + if (!err) { > > + struct rctx *rctx = skcipher_request_ctx(req); > > + > > + rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; > > + err = xor_tweak_post(req, false); > > + > > + if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) { > > + err = cts_final(req, crypto_skcipher_decrypt); > > + if (err == -EINPROGRESS) > > + return; > > + } > > + } > > + > > + skcipher_request_complete(req, err); > > +} > > + > > +static int init_crypt(struct skcipher_request *req, crypto_completion_t compl) > > { > > struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); > > struct rctx *rctx = skcipher_request_ctx(req); > > struct skcipher_request *subreq = &rctx->subreq; > > > > + if (req->cryptlen < XTS_BLOCK_SIZE) > > + return -EINVAL; > > + > > skcipher_request_set_tfm(subreq, ctx->child); > > - skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req); > > + skcipher_request_set_callback(subreq, req->base.flags, compl, req); > > skcipher_request_set_crypt(subreq, req->dst, req->dst, > > - req->cryptlen, NULL); > > + req->cryptlen & ~(XTS_BLOCK_SIZE - 1), NULL); > > > > /* calculate first value of T */ > > crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv); > > + > > + return 0; > > } > > > > static int encrypt(struct skcipher_request *req) > > { > > struct rctx *rctx = skcipher_request_ctx(req); > > struct skcipher_request *subreq = &rctx->subreq; > > + int err; > > > > - init_crypt(req); > > - return xor_tweak_pre(req) ?: > > - crypto_skcipher_encrypt(subreq) ?: > > - xor_tweak_post(req); > > + err = init_crypt(req, encrypt_done) ?: > > + xor_tweak_pre(req, true) ?: > > + crypto_skcipher_encrypt(subreq) ?: > > + xor_tweak_post(req, true); > > + > > + if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0)) > > + return err; > > + > > + return cts_final(req, crypto_skcipher_encrypt); > > } > > > > static int decrypt(struct skcipher_request *req) > > { > > struct rctx *rctx = skcipher_request_ctx(req); > > struct skcipher_request *subreq = &rctx->subreq; > > + int err; > > + > > + err = init_crypt(req, decrypt_done) ?: > > + xor_tweak_pre(req, false) ?: > > + crypto_skcipher_decrypt(subreq) ?: > > + xor_tweak_post(req, false); > > + > > + if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0)) > > + return err; > > > > - init_crypt(req); > > - return xor_tweak_pre(req) ?: > > - crypto_skcipher_decrypt(subreq) ?: > > - xor_tweak_post(req); > > + return cts_final(req, crypto_skcipher_decrypt); > > } > > > > static int init_tfm(struct crypto_skcipher *tfm) > > -- > > 2.17.1 > > Regards, > Pascal van Leeuwen > Silicon IP Architect, Multi-Protocol Engines @ Verimatrix > www.insidesecure.com Regards, Pascal van Leeuwen Silicon IP Architect, Multi-Protocol Engines @ Verimatrix www.insidesecure.com