This patch permit to batch request. This imply: - linking two task via next - set interrupt flag just before running the batch in the last task. - storing all requests for finalizing them later Signed-off-by: Corentin Labbe <clabbe.montjoie@xxxxxxxxx> --- .../allwinner/sun8i-ce/sun8i-ce-cipher.c | 60 +++++++++++++++---- .../crypto/allwinner/sun8i-ce/sun8i-ce-core.c | 15 +++-- drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h | 6 ++ 3 files changed, 66 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index fc0a2299c701..832fb4a51da9 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -96,31 +96,38 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req int flow, i; int nr_sgs = 0; int nr_sgd = 0; + int slot = 0; int err = 0; algt = container_of(alg, struct sun8i_ce_alg_template, alg.skcipher); - dev_dbg(ce->dev, "%s %s %u %x IV(%p %u) key=%u\n", __func__, + dev_dbg(ce->dev, "%s %s %u %x IV(%p %u) key=%u slot=%d\n", __func__, crypto_tfm_alg_name(areq->base.tfm), areq->cryptlen, rctx->op_dir, areq->iv, crypto_skcipher_ivsize(tfm), - op->keylen); - -#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG - algt->stat_req++; -#endif + op->keylen, slot); flow = rctx->flow; chan = &ce->chanlist[flow]; + slot = chan->ct; + +#ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG + algt->stat_req++; + if (chan->ct + 1 > chan->tmax) + chan->tmax = chan->ct + 1; +#endif - cet = chan->tl; + cet = &chan->tl[slot]; memset(cet, 0, sizeof(struct ce_task)); cet->t_id = cpu_to_le32(flow); common = ce->variant->alg_cipher[algt->ce_algo_id]; - common |= rctx->op_dir | CE_COMM_INT; + common |= rctx->op_dir; cet->t_common_ctl = cpu_to_le32(common); + if (slot > 0) + chan->tl[slot - 1].next = cpu_to_le32(chan->t_phy + 176 * slot); + /* CTS and recent CE (H6) need length in bytes, in word otherwise */ if (ce->variant->has_t_dlen_in_bytes) cet->t_dlen = cpu_to_le32(areq->cryptlen); @@ -240,6 +247,9 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req chan->timeout = areq->cryptlen; rctx->nr_sgs = nr_sgs; rctx->nr_sgd = nr_sgd; + rctx->slot = slot; + chan->lreq[chan->ct] = &areq->base; + chan->ct++; return 0; theend_sgs: @@ -281,14 +291,41 @@ int sun8i_ce_cipher_run(struct crypto_engine *engine, void *areq) struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); struct sun8i_ce_dev *ce = op->ce; struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(breq); + struct sun8i_ce_flow *chan; int flow, err; + int i; flow = rctx->flow; + chan = &ce->chanlist[flow]; err = sun8i_ce_run_task(ce, flow, crypto_tfm_alg_name(breq->base.tfm)); - crypto_finalize_skcipher_request(engine, breq, err); + for (i = 0; i < chan->ct; i++) { + if (!chan->lreq[i]) { + dev_err(ce->dev, "Missing request at slot %d\n", i); + continue; + } + breq = container_of(chan->lreq[i], struct skcipher_request, base); + crypto_finalize_skcipher_request(engine, breq, err); + chan->lreq[i] = NULL; + } + chan->ct = 0; return 0; } +static int sun8i_ce_qmore(struct crypto_engine *engine, void *async_req) +{ + struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct sun8i_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm); + struct sun8i_ce_dev *ce = op->ce; + struct sun8i_cipher_req_ctx *rctx = skcipher_request_ctx(areq); + struct sun8i_ce_flow *chan; + int flow; + + flow = rctx->flow; + chan = &ce->chanlist[flow]; + return MAXTASK - chan->ct; +} + static int sun8i_ce_cipher_unprepare(struct crypto_engine *engine, void *async_req) { struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base); @@ -301,11 +338,13 @@ static int sun8i_ce_cipher_unprepare(struct crypto_engine *engine, void *async_r unsigned int ivsize, offset; int nr_sgs = rctx->nr_sgs; int nr_sgd = rctx->nr_sgd; + int slot = rctx->slot; int flow; flow = rctx->flow; chan = &ce->chanlist[flow]; - cet = chan->tl; + + cet = &chan->tl[slot]; ivsize = crypto_skcipher_ivsize(tfm); if (areq->src == areq->dst) { @@ -404,6 +443,7 @@ int sun8i_ce_cipher_init(struct crypto_tfm *tfm) op->enginectx.op.do_one_request = sun8i_ce_cipher_run; op->enginectx.op.prepare_request = sun8i_ce_cipher_prepare; op->enginectx.op.unprepare_request = sun8i_ce_cipher_unprepare; + op->enginectx.op.can_queue_more = sun8i_ce_qmore; err = pm_runtime_get_sync(op->ce->dev); if (err < 0) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c index e8bf7bf31061..348d3927344b 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-core.c @@ -104,8 +104,10 @@ int sun8i_ce_run_task(struct sun8i_ce_dev *ce, int flow, const char *name) int err = 0; #ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG - ce->chanlist[flow].stat_req++; + ce->chanlist[flow].stat_req += ce->chanlist[flow].ct; #endif + /* mark last one */ + ce->chanlist[flow].tl[ce->chanlist[flow].ct - 1].t_common_ctl |= cpu_to_le32(CE_COMM_INT); mutex_lock(&ce->mlock); @@ -120,7 +122,7 @@ int sun8i_ce_run_task(struct sun8i_ce_dev *ce, int flow, const char *name) /* Be sure all data is written before enabling the task */ wmb(); - v = 1 | (ce->chanlist[flow].tl->t_common_ctl & 0x7F) << 8; + v = 1 | (ce->chanlist[flow].tl[0].t_common_ctl & 0x7F) << 8; writel(v, ce->base + CE_TLR); mutex_unlock(&ce->mlock); @@ -128,7 +130,7 @@ int sun8i_ce_run_task(struct sun8i_ce_dev *ce, int flow, const char *name) msecs_to_jiffies(ce->chanlist[flow].timeout)); if (ce->chanlist[flow].status == 0) { - dev_err(ce->dev, "DMA timeout for %s\n", name); + dev_err(ce->dev, "DMA timeout for %s on flow %d (batch=%d)\n", name, flow, ce->chanlist[flow].ct); err = -EFAULT; } /* No need to lock for this read, the channel is locked so @@ -285,7 +287,10 @@ static int sun8i_ce_dbgfs_read(struct seq_file *seq, void *v) int i; for (i = 0; i < MAXFLOW; i++) - seq_printf(seq, "Channel %d: nreq %lu\n", i, ce->chanlist[i].stat_req); + seq_printf(seq, "Channel %d: nreq %lu tmax %d eqlen=%d/%d\n", i, + ce->chanlist[i].stat_req, ce->chanlist[i].tmax, + ce->chanlist[i].engine->queue.qlen, + ce->chanlist[i].engine->queue.max_qlen); for (i = 0; i < ARRAY_SIZE(ce_algs); i++) { if (!ce_algs[i].ce) @@ -343,7 +348,7 @@ static int sun8i_ce_allocate_chanlist(struct sun8i_ce_dev *ce) for (i = 0; i < MAXFLOW; i++) { init_completion(&ce->chanlist[i].complete); - ce->chanlist[i].engine = crypto_engine_alloc_init(ce->dev, true); + ce->chanlist[i].engine = crypto_engine_alloc_init2(ce->dev, true, MAXTASK * 2); if (!ce->chanlist[i].engine) { dev_err(ce->dev, "Cannot allocate engine\n"); i--; diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h index 2d3325a13bf1..59e9985fc6c8 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h @@ -135,6 +135,7 @@ struct ce_task { * @t_phy: Physical address of task * @tl: pointer to the current ce_task for this flow * @stat_req: number of request done by this flow + * @tmax: The maximum number of tasks done in one batch */ struct sun8i_ce_flow { struct crypto_engine *engine; @@ -143,8 +144,11 @@ struct sun8i_ce_flow { dma_addr_t t_phy; int timeout; struct ce_task *tl; + struct crypto_async_request *lreq[MAXTASK]; + int ct; #ifdef CONFIG_CRYPTO_DEV_SUN8I_CE_DEBUG unsigned long stat_req; + int tmax; #endif }; @@ -185,6 +189,7 @@ struct sun8i_ce_dev { * @ivlen: size of bounce_iv * @nr_sgs: The number of source SG (as given by dma_map_sg()) * @nr_sgd: The number of destination SG (as given by dma_map_sg()) + * @slot: The slot in the tasklist used for this requests */ struct sun8i_cipher_req_ctx { u32 op_dir; @@ -194,6 +199,7 @@ struct sun8i_cipher_req_ctx { unsigned int ivlen; int nr_sgs; int nr_sgd; + int slot; }; /* -- 2.24.1