HI, Thanks for the review. Will incorporate all suggested changes in v3. On i.MX8, performance is coming ~ 42 MB/s Thanks, Meenakshi > -----Original Message----- > From: Horia Geanta <horia.geanta@xxxxxxx> > Sent: Tuesday, April 5, 2022 6:02 PM > To: Meenakshi Aggarwal <meenakshi.aggarwal@xxxxxxx>; Pankaj Gupta > <pankaj.gupta@xxxxxxx>; Gaurav Jain <gaurav.jain@xxxxxxx>; Varun Sethi > <V.Sethi@xxxxxxx>; Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx>; David S . > Miller <davem@xxxxxxxxxxxxx> > Cc: linux-crypto@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; dl-linux-imx > <linux-imx@xxxxxxx>; kernel test robot <lkp@xxxxxxxxx> > Subject: Re: [PATCH v2 1/1] crypto: caam/rng: Add support for PRNG > > On 3/16/2022 8:02 PM, Meenakshi Aggarwal wrote: > > From: Meenakshi Aggarwal <meenakshi.aggarwal@xxxxxxx> > > > > Add support for random number generation using PRNG mode of CAAM and > > expose the interface through crypto API. > > > According to the RM, the HW implementation of the DRBG follows NIST SP 800- > 90A specification for DRBG_Hash SHA-256 function > (https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-90Ar1.pdf). > This should be mentioned in the commit message at minimum. > > > Reported-by: kernel test robot <lkp@xxxxxxxxx> > This isn't required and doesn't make sense once you've squashed the fix. > > > +/* prng per-device context */ > > +struct caam_prng_ctx { > > + struct device *jrdev; > jrdev doesn't have to be saved in this struct, it's lifetime is very limited. > > > + struct completion done; > > +}; > > + > > +struct caam_prng_alg { > > + struct rng_alg rng; > > + bool registered; > > +}; > > + > > +static void caam_prng_done(struct device *jrdev, u32 *desc, u32 err, > > + void *context) > > +{ > > + struct caam_prng_ctx *jctx = context; > > + > > + if (err) > > + caam_jr_strstatus(jrdev, err); > The error returned by caam_jr_strstatus() should be propagated back to who > initially enqueued the corresponding. > For this purpose, struct caam_prng_ctx could be extended with an "err" member. > > > + > > + complete(&jctx->done); > > +} > > + > > > +static int caam_prng_generate(struct crypto_rng *tfm, > > + const u8 *src, unsigned int slen, > > + u8 *dst, unsigned int dlen) > > +{ > > + struct caam_prng_ctx ctx; > > + dma_addr_t dst_dma; > > + u32 *desc; > > + u8 *buf; > > + int ret; > > + > > + buf = kzalloc(dlen, GFP_KERNEL); > > + if (!buf) > > + return -ENOMEM; > > + > > + ctx.jrdev = caam_jr_alloc(); > > + ret = PTR_ERR_OR_ZERO(ctx.jrdev); > > + if (ret) { > > + pr_err("Job Ring Device allocation failed\n"); > > + kfree(buf); > > + return ret; > > + } > > + > > + desc = kzalloc(CAAM_PRNG_DESC_LEN, GFP_KERNEL | GFP_DMA); > > + if (!desc) { > > + caam_jr_free(ctx.jrdev); > > + kfree(buf); > > + return -ENOMEM; > Please fix the error handling to reuse the free code at the end of the function. > > > + } > > + > > + dst_dma = dma_map_single(ctx.jrdev, buf, dlen, DMA_FROM_DEVICE); > > + if (dma_mapping_error(ctx.jrdev, dst_dma)) { > > + dev_err(ctx.jrdev, "Failed to map destination buffer memory\n"); > > + ret = -ENOMEM; > > + goto out; > > + } > > + > > + init_completion(&ctx.done); > > + ret = caam_jr_enqueue(ctx.jrdev, > > + caam_init_prng_desc(desc, dst_dma, dlen), > > + caam_prng_done, &ctx); > > + > > + if (ret == -EINPROGRESS) { > > + wait_for_completion(&ctx.done); > > + ret = 0; > > + } > > + > > + dma_unmap_single(ctx.jrdev, dst_dma, dlen, DMA_FROM_DEVICE); > > + > > + memcpy(dst, buf, dlen); > I am a bit worried wrt. performance, considering the memory allocations and > the memcpy on the hotpath. > > Previous version of CAAM PRNG driver was getting ~ 200 MB/s on LS and 50 > MB/s on i.MX8. > > How does the current version compare? > Given that there's no prefetch buffer and there are memory allocation, copy > operations on the hotpath, I'd expect a hefty penalty. > > > +out: > > + caam_jr_free(ctx.jrdev); > > + kfree(desc); > > + kfree(buf); > > + return ret; > > +} > > + > > +static void caam_prng_exit(struct crypto_tfm *tfm) {} > > + > > +static int caam_prng_init(struct crypto_tfm *tfm) { > > + return 0; > > +} > > + > > +static int caam_prng_seed(struct crypto_rng *tfm, > > + const u8 *seed, unsigned int slen) { > > + struct caam_prng_ctx ctx; > > + dma_addr_t seed_dma; > > + u32 *desc; > > + u8 *buf; > > + int ret = 0; > > + > > + if (seed == NULL) { > > + pr_err("Seed not provided\n"); > > + return ret; > > + } > > + > > + buf = kzalloc(slen, GFP_KERNEL); > > + if (!buf) > > + return -ENOMEM; > > + > > + ctx.jrdev = caam_jr_alloc(); > > + ret = PTR_ERR_OR_ZERO(ctx.jrdev); > > + if (ret) { > > + pr_err("Job Ring Device allocation failed\n"); > > + kfree(buf); > > + return ret; > > + } > > + > > + desc = kzalloc(CAAM_PRNG_DESC_LEN, GFP_KERNEL | GFP_DMA); > > + if (!desc) { > > + caam_jr_free(ctx.jrdev); > > + kfree(buf); > > + return -ENOMEM; > Same here, error handling at the end of the function should be reused. > > > + } > > + > > + memcpy(buf, seed, slen); > > + > > + seed_dma = dma_map_single(ctx.jrdev, buf, slen, DMA_FROM_DEVICE); > > + if (dma_mapping_error(ctx.jrdev, seed_dma)) { > > + dev_err(ctx.jrdev, "Failed to map seed buffer memory\n"); > > + ret = -ENOMEM; > > + goto out; > > + } > > + > > + init_completion(&ctx.done); > > + ret = caam_jr_enqueue(ctx.jrdev, > > + caam_init_reseed_desc(desc, seed_dma, slen), > > + caam_prng_done, &ctx); > > + > > + if (ret == -EINPROGRESS) { > > + wait_for_completion(&ctx.done); > > + ret = 0; > > + } > > + > > + dma_unmap_single(ctx.jrdev, seed_dma, slen, DMA_FROM_DEVICE); > > + > > +out: > > + caam_jr_free(ctx.jrdev); > > + kfree(desc); > > + kfree(buf); > > + return ret; > > +} > > + > > +static struct caam_prng_alg caam_prng_alg = { > > + .rng = { > > + .generate = caam_prng_generate, > > + .seed = caam_prng_seed, > > + .seedsize = 32, > seedsize should be set to 0, HW does not need an externally-provided seed since > it fetches it internally from TRNG. > > > +int caam_prng_register(struct device *ctrldev) { > > + struct caam_drv_private *priv = dev_get_drvdata(ctrldev); > > + u32 rng_inst; > > + int ret = 0; > > + > > + /* Check for available RNG blocks before registration */ > > + if (priv->era < 10) > > + rng_inst = (rd_reg32(&priv->jr[0]->perfmon.cha_num_ls) & > > + CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT; > > + else > > + rng_inst = rd_reg32(&priv->jr[0]->vreg.rng) & > CHA_VER_NUM_MASK; > > + > > + if (!rng_inst) { > > + dev_dbg(ctrldev, "RNG block is not available... skipping > registering algorithm\n"); > > + return ret; > > + } > > + > > + ret = crypto_register_rng(&caam_prng_alg.rng); > > + if (ret) { > > + dev_err(ctrldev, > > + "couldn't register rng crypto alg: %d\n", > > + ret); > > + return ret; > > + } > > + > > + caam_prng_alg.registered = true; > > + > > + dev_info(ctrldev, > > + "rng crypto API alg registered %s\n", > > +caam_prng_alg.rng.base.cra_name); > driver_name should be printed, it's more specific / unique. > > > diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index > > 7f2b1101f567..11849362f912 100644 > > --- a/drivers/crypto/caam/jr.c > > +++ b/drivers/crypto/caam/jr.c > > @@ -39,6 +39,7 @@ static void register_algs(struct caam_drv_private_jr > *jrpriv, > > caam_algapi_hash_init(dev); > > caam_pkc_init(dev); > > jrpriv->hwrng = !caam_rng_init(dev); > > + caam_prng_register(dev); > > caam_qi_algapi_init(dev); > > > > algs_unlock: > > @@ -56,6 +57,7 @@ static void unregister_algs(void) > > > > caam_pkc_exit(); > > caam_algapi_hash_exit(); > > + caam_prng_unregister(NULL); > Unregistering order should be the reverse order of registering. > > Horia