Hi, I figured that I'd push this patch to the list for comments. This stems from recent work on the linux-crypto/linuxppc-dev lists regarding AES-XTS support for the Freescale Talitos engine. The background can be found in this message and the rest of the thread: http://marc.info/?l=linux-crypto-vger&m=142533320730893&w=2 The AES-XTS mode in Talitos allows sending a starting IV (usually sector #), and the hardware increments this as new sectors are encrypted/decrypted. This allowed me to investigate not only dispatching 4kB IOs, but also to extend the scatter/gather lists to include as much IO as Talitos can handle in a single request (64kB - 1, or rounding down, 60kB). The original performance numbers that I quoted follow, with the extra larger-IO up to 60kB line: Write (MB/s) Read (MB/s) Unencrypted 140 176 aes-xts-plain64 4kB 113 115 aes-xts-plain64 512b 71 56 aes-xts-plain64 60kB 120 132 with IOs up to 60kB, the performance is even closer to the un-encrypted values. It's certainly not a pretty patch, but as a proof of concept it does work. I avoided the IV issues by just ifdeffing them out... mh -- Martin Hicks P.Eng. | mort@xxxxxxxx Bork Consulting Inc. | +1 (613) 266-2296 >From 67a335757a5d028e10dbac6f78cca0ab89871157 Mon Sep 17 00:00:00 2001 From: Martin Hicks <mort@xxxxxxxx> Date: Wed, 25 Mar 2015 15:38:22 -0400 Subject: [PATCH] dm-crypt: Issue large IOs to crypto engines This is a WIP patch to issue larger-than-sector-size IOs to the Talitos Engine, which implements aes-xts. --- drivers/md/dm-crypt.c | 134 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 111 insertions(+), 23 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 08981be..a15b19f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -42,6 +42,7 @@ struct convert_context { struct bvec_iter iter_out; sector_t cc_sector; atomic_t cc_pending; + unsigned int block_size; struct ablkcipher_request *req; }; @@ -63,8 +64,10 @@ struct dm_crypt_io { struct dm_crypt_request { struct convert_context *ctx; - struct scatterlist sg_in; - struct scatterlist sg_out; + struct sg_table sg_in; + struct sg_table sg_out; + struct scatterlist sgl_in; + struct scatterlist sgl_out; sector_t iv_sector; }; @@ -142,6 +145,8 @@ struct crypt_config { sector_t iv_offset; unsigned int iv_size; + unsigned int block_size; + /* ESSIV: struct crypto_cipher *essiv_tfm */ void *iv_private; struct crypto_ablkcipher **tfms; @@ -459,6 +464,8 @@ static int crypt_iv_null_gen(struct crypt_config *cc, u8 *iv, return 0; } +#if 0 + static void crypt_iv_lmk_dtr(struct crypt_config *cc) { struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk; @@ -577,7 +584,7 @@ static int crypt_iv_lmk_gen(struct crypt_config *cc, u8 *iv, int r = 0; if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) { - src = kmap_atomic(sg_page(&dmreq->sg_in)); + src = kmap_atomic(sg_page(dmreq->sg_in->sgl)); r = crypt_iv_lmk_one(cc, iv, dmreq, src + dmreq->sg_in.offset); kunmap_atomic(src); } else @@ -595,7 +602,7 @@ static int crypt_iv_lmk_post(struct crypt_config *cc, u8 *iv, if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) return 0; - dst = kmap_atomic(sg_page(&dmreq->sg_out)); + dst = kmap_atomic(sg_page(&dmreq->sg_out->sgl)); r = crypt_iv_lmk_one(cc, iv, dmreq, dst + dmreq->sg_out.offset); /* Tweak the first block of plaintext sector */ @@ -719,7 +726,7 @@ static int crypt_iv_tcw_gen(struct crypt_config *cc, u8 *iv, /* Remove whitening from ciphertext */ if (bio_data_dir(dmreq->ctx->bio_in) != WRITE) { - src = kmap_atomic(sg_page(&dmreq->sg_in)); + src = kmap_atomic(sg_page(dmreq->sg_in->sgl)); r = crypt_iv_tcw_whitening(cc, dmreq, src + dmreq->sg_in.offset); kunmap_atomic(src); } @@ -749,6 +756,7 @@ static int crypt_iv_tcw_post(struct crypt_config *cc, u8 *iv, return r; } +#endif static struct crypt_iv_operations crypt_iv_plain_ops = { .generator = crypt_iv_plain_gen @@ -776,6 +784,7 @@ static struct crypt_iv_operations crypt_iv_null_ops = { .generator = crypt_iv_null_gen }; +#if 0 static struct crypt_iv_operations crypt_iv_lmk_ops = { .ctr = crypt_iv_lmk_ctr, .dtr = crypt_iv_lmk_dtr, @@ -793,6 +802,7 @@ static struct crypt_iv_operations crypt_iv_tcw_ops = { .generator = crypt_iv_tcw_gen, .post = crypt_iv_tcw_post }; +#endif static void crypt_convert_init(struct crypt_config *cc, struct convert_context *ctx, @@ -801,10 +811,17 @@ static void crypt_convert_init(struct crypt_config *cc, { ctx->bio_in = bio_in; ctx->bio_out = bio_out; - if (bio_in) + ctx->block_size = 0; + if (bio_in) { ctx->iter_in = bio_in->bi_iter; - if (bio_out) + ctx->block_size = max(ctx->block_size, bio_in->bi_iter.bi_size); + } + if (bio_out) { ctx->iter_out = bio_out->bi_iter; + ctx->block_size = max(ctx->block_size, bio_out->bi_iter.bi_size); + } + if (ctx->block_size > cc->block_size) + ctx->block_size = cc->block_size; ctx->cc_sector = sector + cc->iv_offset; init_completion(&ctx->restart); } @@ -835,33 +852,70 @@ static int crypt_convert_block(struct crypt_config *cc, struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in); struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out); struct dm_crypt_request *dmreq; + int sg_count; + unsigned long block_size, request_size = 0; u8 *iv; - int r; + int i, r; dmreq = dmreq_of_req(cc, req); iv = iv_of_dmreq(cc, dmreq); dmreq->iv_sector = ctx->cc_sector; dmreq->ctx = ctx; - sg_init_table(&dmreq->sg_in, 1); - sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT, - bv_in.bv_offset); - sg_init_table(&dmreq->sg_out, 1); - sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT, - bv_out.bv_offset); + block_size = ctx->block_size; + if (ctx->iter_in.bi_size < block_size) + block_size = ctx->iter_in.bi_size; + if (ctx->iter_out.bi_size < block_size) + block_size = ctx->iter_out.bi_size; + sg_count = DIV_ROUND_UP(block_size, PAGE_SIZE); - bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT); - bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT); + if (sg_count == 1) { + sg_init_table(&dmreq->sgl_in, 1); + sg_init_table(&dmreq->sgl_out, 1); + } else { + r = sg_alloc_table(&dmreq->sg_in, sg_count, GFP_NOIO); + r += sg_alloc_table(&dmreq->sg_out, sg_count, GFP_NOIO); + if (r) + goto out_error; + } + + for (i = 0; i < sg_count; i++) { + int len = min(block_size, PAGE_SIZE); + if (len > ctx->iter_in.bi_size) + len = ctx->iter_in.bi_size; + if (len > ctx->iter_out.bi_size) + len = ctx->iter_out.bi_size; + + block_size -= len; + request_size += len; + + if (sg_count == 1) { + sg_set_page(&dmreq->sgl_in, bv_in.bv_page, len, bv_in.bv_offset); + sg_set_page(&dmreq->sgl_out, bv_out.bv_page, len, bv_out.bv_offset); + } else { + sg_set_page(&dmreq->sg_in.sgl[i], bv_in.bv_page, len, + bv_in.bv_offset); + sg_set_page(&dmreq->sg_out.sgl[i], bv_out.bv_page, len, + bv_out.bv_offset); + } + + bio_advance_iter(ctx->bio_in, &ctx->iter_in, len); + bio_advance_iter(ctx->bio_out, &ctx->iter_out, len); + } if (cc->iv_gen_ops) { r = cc->iv_gen_ops->generator(cc, iv, dmreq); if (r < 0) - return r; + goto out_error; } - ablkcipher_request_set_crypt(req, &dmreq->sg_in, &dmreq->sg_out, - 1 << SECTOR_SHIFT, iv); + if (sg_count == 1) + ablkcipher_request_set_crypt(req, &dmreq->sgl_in, &dmreq->sgl_out, + request_size, iv); + else + ablkcipher_request_set_crypt(req, dmreq->sg_in.sgl, dmreq->sg_out.sgl, + request_size, iv); if (bio_data_dir(ctx->bio_in) == WRITE) r = crypto_ablkcipher_encrypt(req); @@ -871,6 +925,12 @@ static int crypt_convert_block(struct crypt_config *cc, if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post) r = cc->iv_gen_ops->post(cc, iv, dmreq); +out_error: + if (r && sg_count > 1) { + sg_free_table(&dmreq->sg_in); + sg_free_table(&dmreq->sg_out); + } + return r; } @@ -881,10 +941,14 @@ static void crypt_alloc_req(struct crypt_config *cc, struct convert_context *ctx) { unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1); + struct dm_crypt_request *dmreq; if (!ctx->req) ctx->req = mempool_alloc(cc->req_pool, GFP_NOIO); + dmreq = dmreq_of_req(cc, ctx->req); + memset(dmreq, 0, sizeof(struct dm_crypt_request)); + ablkcipher_request_set_tfm(ctx->req, cc->tfms[key_index]); ablkcipher_request_set_callback(ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, @@ -895,9 +959,16 @@ static void crypt_free_req(struct crypt_config *cc, struct ablkcipher_request *req, struct bio *base_bio) { struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size); + struct dm_crypt_request *dmreq; - if ((struct ablkcipher_request *)(io + 1) != req) + if ((struct ablkcipher_request *)(io + 1) != req) { + dmreq = dmreq_of_req(cc, req); + if (dmreq->sg_in.sgl) + sg_free_table(&dmreq->sg_in); + if (dmreq->sg_out.sgl) + sg_free_table(&dmreq->sg_out); mempool_free(req, cc->req_pool); + } } /* @@ -907,6 +978,7 @@ static int crypt_convert(struct crypt_config *cc, struct convert_context *ctx) { int r; + int block_size; atomic_set(&ctx->cc_pending, 1); @@ -916,6 +988,8 @@ static int crypt_convert(struct crypt_config *cc, atomic_inc(&ctx->cc_pending); + block_size = ctx->iter_in.bi_size > ctx->block_size ? + ctx->block_size : ctx->iter_in.bi_size; r = crypt_convert_block(cc, ctx, ctx->req); switch (r) { @@ -926,13 +1000,13 @@ static int crypt_convert(struct crypt_config *cc, /* fall through*/ case -EINPROGRESS: ctx->req = NULL; - ctx->cc_sector++; + ctx->cc_sector += block_size >> SECTOR_SHIFT; continue; /* sync */ case 0: atomic_dec(&ctx->cc_pending); - ctx->cc_sector++; + ctx->cc_sector += block_size >> SECTOR_SHIFT; cond_resched(); continue; @@ -1615,6 +1689,7 @@ static int crypt_ctr_cipher(struct dm_target *ti, cc->iv_gen_ops = &crypt_iv_benbi_ops; else if (strcmp(ivmode, "null") == 0) cc->iv_gen_ops = &crypt_iv_null_ops; +#if 0 else if (strcmp(ivmode, "lmk") == 0) { cc->iv_gen_ops = &crypt_iv_lmk_ops; /* @@ -1632,6 +1707,8 @@ static int crypt_ctr_cipher(struct dm_target *ti, cc->key_parts += 2; /* IV + whitening */ cc->key_extra_size = cc->iv_size + TCW_WHITENING_SIZE; } else { +#endif + else { ret = -EINVAL; ti->error = "Invalid IV mode"; goto bad; @@ -1662,6 +1739,17 @@ static int crypt_ctr_cipher(struct dm_target *ti, } } + /* + * If the Crypto algorithm supports larger than sector size + * operations, use them. + */ + if (!strcmp("xts-aes-talitos", + crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(any_tfm(cc))))) + /* Talitos max data size */ + cc->block_size = round_down(65535, PAGE_SIZE); + else + cc->block_size = 1 << SECTOR_SHIFT; + ret = 0; bad: kfree(cipher_api); @@ -1976,7 +2064,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 13, 0}, + .version = {1, 14, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, -- 1.7.10.4 _______________________________________________ dm-crypt mailing list dm-crypt@xxxxxxxx http://www.saout.de/mailman/listinfo/dm-crypt