RE: [PATCH v2 1/1] crypto: caam/rng: Add support for PRNG

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



HI,

Thanks for the review.

Will incorporate all suggested changes in v3.

On i.MX8, performance is coming  ~ 42 MB/s

Thanks,
Meenakshi
> -----Original Message-----
> From: Horia Geanta <horia.geanta@xxxxxxx>
> Sent: Tuesday, April 5, 2022 6:02 PM
> To: Meenakshi Aggarwal <meenakshi.aggarwal@xxxxxxx>; Pankaj Gupta
> <pankaj.gupta@xxxxxxx>; Gaurav Jain <gaurav.jain@xxxxxxx>; Varun Sethi
> <V.Sethi@xxxxxxx>; Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx>; David S .
> Miller <davem@xxxxxxxxxxxxx>
> Cc: linux-crypto@xxxxxxxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; dl-linux-imx
> <linux-imx@xxxxxxx>; kernel test robot <lkp@xxxxxxxxx>
> Subject: Re: [PATCH v2 1/1] crypto: caam/rng: Add support for PRNG
> 
> On 3/16/2022 8:02 PM, Meenakshi Aggarwal wrote:
> > From: Meenakshi Aggarwal <meenakshi.aggarwal@xxxxxxx>
> >
> > Add support for random number generation using PRNG mode of CAAM and
> > expose the interface through crypto API.
> >
> According to the RM, the HW implementation of the DRBG follows NIST SP 800-
> 90A specification for DRBG_Hash SHA-256 function
> (https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-90Ar1.pdf).
> This should be mentioned in the commit message at minimum.
> 
> > Reported-by: kernel test robot <lkp@xxxxxxxxx>
> This isn't required and doesn't make sense once you've squashed the fix.
> 
> > +/* prng per-device context */
> > +struct caam_prng_ctx {
> > +	struct device *jrdev;
> jrdev doesn't have to be saved in this struct, it's lifetime is very limited.
> 
> > +	struct completion done;
> > +};
> > +
> > +struct caam_prng_alg {
> > +	struct rng_alg rng;
> > +	bool registered;
> > +};
> > +
> > +static void caam_prng_done(struct device *jrdev, u32 *desc, u32 err,
> > +			  void *context)
> > +{
> > +	struct caam_prng_ctx *jctx = context;
> > +
> > +	if (err)
> > +		caam_jr_strstatus(jrdev, err);
> The error returned by caam_jr_strstatus() should be propagated back to who
> initially enqueued the corresponding.
> For this purpose, struct caam_prng_ctx could be extended with an "err" member.
> 
> > +
> > +	complete(&jctx->done);
> > +}
> > +
> 
> > +static int caam_prng_generate(struct crypto_rng *tfm,
> > +			     const u8 *src, unsigned int slen,
> > +			     u8 *dst, unsigned int dlen)
> > +{
> > +	struct caam_prng_ctx ctx;
> > +	dma_addr_t dst_dma;
> > +	u32 *desc;
> > +	u8 *buf;
> > +	int ret;
> > +
> > +	buf = kzalloc(dlen, GFP_KERNEL);
> > +	if (!buf)
> > +		return -ENOMEM;
> > +
> > +	ctx.jrdev = caam_jr_alloc();
> > +	ret = PTR_ERR_OR_ZERO(ctx.jrdev);
> > +	if (ret) {
> > +		pr_err("Job Ring Device allocation failed\n");
> > +		kfree(buf);
> > +		return ret;
> > +	}
> > +
> > +	desc = kzalloc(CAAM_PRNG_DESC_LEN, GFP_KERNEL | GFP_DMA);
> > +	if (!desc) {
> > +		caam_jr_free(ctx.jrdev);
> > +		kfree(buf);
> > +		return -ENOMEM;
> Please fix the error handling to reuse the free code at the end of the function.
> 
> > +	}
> > +
> > +	dst_dma = dma_map_single(ctx.jrdev, buf, dlen, DMA_FROM_DEVICE);
> > +	if (dma_mapping_error(ctx.jrdev, dst_dma)) {
> > +		dev_err(ctx.jrdev, "Failed to map destination buffer memory\n");
> > +		ret = -ENOMEM;
> > +		goto out;
> > +	}
> > +
> > +	init_completion(&ctx.done);
> > +	ret = caam_jr_enqueue(ctx.jrdev,
> > +			      caam_init_prng_desc(desc, dst_dma, dlen),
> > +			      caam_prng_done, &ctx);
> > +
> > +	if (ret == -EINPROGRESS) {
> > +		wait_for_completion(&ctx.done);
> > +		ret = 0;
> > +	}
> > +
> > +	dma_unmap_single(ctx.jrdev, dst_dma, dlen, DMA_FROM_DEVICE);
> > +
> > +	memcpy(dst, buf, dlen);
> I am a bit worried wrt. performance, considering the memory allocations and
> the memcpy on the hotpath.
> 
> Previous version of CAAM PRNG driver was getting ~ 200 MB/s on LS and 50
> MB/s on i.MX8.
> 
> How does the current version compare?
> Given that there's no prefetch buffer and there are memory allocation, copy
> operations on the hotpath, I'd expect a hefty penalty.
> 
> > +out:
> > +	caam_jr_free(ctx.jrdev);
> > +	kfree(desc);
> > +	kfree(buf);
> > +	return ret;
> > +}
> > +
> > +static void caam_prng_exit(struct crypto_tfm *tfm) {}
> > +
> > +static int caam_prng_init(struct crypto_tfm *tfm) {
> > +	return 0;
> > +}
> > +
> > +static int caam_prng_seed(struct crypto_rng *tfm,
> > +			 const u8 *seed, unsigned int slen) {
> > +	struct caam_prng_ctx ctx;
> > +	dma_addr_t seed_dma;
> > +	u32 *desc;
> > +	u8 *buf;
> > +	int ret = 0;
> > +
> > +	if (seed == NULL) {
> > +		pr_err("Seed not provided\n");
> > +		return ret;
> > +	}
> > +
> > +	buf = kzalloc(slen, GFP_KERNEL);
> > +	if (!buf)
> > +		return -ENOMEM;
> > +
> > +	ctx.jrdev = caam_jr_alloc();
> > +	ret = PTR_ERR_OR_ZERO(ctx.jrdev);
> > +	if (ret) {
> > +		pr_err("Job Ring Device allocation failed\n");
> > +		kfree(buf);
> > +		return ret;
> > +	}
> > +
> > +	desc = kzalloc(CAAM_PRNG_DESC_LEN, GFP_KERNEL | GFP_DMA);
> > +	if (!desc) {
> > +		caam_jr_free(ctx.jrdev);
> > +		kfree(buf);
> > +		return -ENOMEM;
> Same here, error handling at the end of the function should be reused.
> 
> > +	}
> > +
> > +	memcpy(buf, seed, slen);
> > +
> > +	seed_dma = dma_map_single(ctx.jrdev, buf, slen, DMA_FROM_DEVICE);
> > +	if (dma_mapping_error(ctx.jrdev, seed_dma)) {
> > +		dev_err(ctx.jrdev, "Failed to map seed buffer memory\n");
> > +		ret = -ENOMEM;
> > +		goto out;
> > +	}
> > +
> > +	init_completion(&ctx.done);
> > +	ret = caam_jr_enqueue(ctx.jrdev,
> > +			      caam_init_reseed_desc(desc, seed_dma, slen),
> > +			      caam_prng_done, &ctx);
> > +
> > +	if (ret == -EINPROGRESS) {
> > +		wait_for_completion(&ctx.done);
> > +		ret = 0;
> > +	}
> > +
> > +	dma_unmap_single(ctx.jrdev, seed_dma, slen, DMA_FROM_DEVICE);
> > +
> > +out:
> > +	caam_jr_free(ctx.jrdev);
> > +	kfree(desc);
> > +	kfree(buf);
> > +	return ret;
> > +}
> > +
> > +static struct caam_prng_alg caam_prng_alg = {
> > +	.rng = {
> > +		.generate = caam_prng_generate,
> > +		.seed = caam_prng_seed,
> > +		.seedsize = 32,
> seedsize should be set to 0, HW does not need an externally-provided seed since
> it fetches it internally from TRNG.
> 
> > +int caam_prng_register(struct device *ctrldev) {
> > +	struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
> > +	u32 rng_inst;
> > +	int ret = 0;
> > +
> > +	/* Check for available RNG blocks before registration */
> > +	if (priv->era < 10)
> > +		rng_inst = (rd_reg32(&priv->jr[0]->perfmon.cha_num_ls) &
> > +			    CHA_ID_LS_RNG_MASK) >> CHA_ID_LS_RNG_SHIFT;
> > +	else
> > +		rng_inst = rd_reg32(&priv->jr[0]->vreg.rng) &
> CHA_VER_NUM_MASK;
> > +
> > +	if (!rng_inst) {
> > +		dev_dbg(ctrldev, "RNG block is not available... skipping
> registering algorithm\n");
> > +		return ret;
> > +	}
> > +
> > +	ret = crypto_register_rng(&caam_prng_alg.rng);
> > +	if (ret) {
> > +		dev_err(ctrldev,
> > +			"couldn't register rng crypto alg: %d\n",
> > +			ret);
> > +		return ret;
> > +	}
> > +
> > +	caam_prng_alg.registered = true;
> > +
> > +	dev_info(ctrldev,
> > +		 "rng crypto API alg registered %s\n",
> > +caam_prng_alg.rng.base.cra_name);
> driver_name should be printed, it's more specific / unique.
> 
> > diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index
> > 7f2b1101f567..11849362f912 100644
> > --- a/drivers/crypto/caam/jr.c
> > +++ b/drivers/crypto/caam/jr.c
> > @@ -39,6 +39,7 @@ static void register_algs(struct caam_drv_private_jr
> *jrpriv,
> >  	caam_algapi_hash_init(dev);
> >  	caam_pkc_init(dev);
> >  	jrpriv->hwrng = !caam_rng_init(dev);
> > +	caam_prng_register(dev);
> >  	caam_qi_algapi_init(dev);
> >
> >  algs_unlock:
> > @@ -56,6 +57,7 @@ static void unregister_algs(void)
> >
> >  	caam_pkc_exit();
> >  	caam_algapi_hash_exit();
> > +	caam_prng_unregister(NULL);
> Unregistering order should be the reverse order of registering.
> 
> Horia




[Index of Archives]     [Kernel]     [Gnu Classpath]     [Gnu Crypto]     [DM Crypt]     [Netfilter]     [Bugtraq]

  Powered by Linux