RE: [v2 PATCH 3/7] crypto: acomp - Add request chaining and virtual addresses

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> -----Original Message-----
> From: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx>
> Sent: Tuesday, March 4, 2025 1:25 AM
> To: Linux Crypto Mailing List <linux-crypto@xxxxxxxxxxxxxxx>
> Cc: linux-mm@xxxxxxxxx; Yosry Ahmed <yosry.ahmed@xxxxxxxxx>; Sridhar,
> Kanchana P <kanchana.p.sridhar@xxxxxxxxx>
> Subject: [v2 PATCH 3/7] crypto: acomp - Add request chaining and virtual
> addresses
> 
> This adds request chaining and virtual address support to the
> acomp interface.
> 
> It is identical to the ahash interface, except that a new flag
> CRYPTO_ACOMP_REQ_NONDMA has been added to indicate that the
> virtual addresses are not suitable for DMA.  This is because
> all existing and potential acomp users can provide memory that
> is suitable for DMA so there is no need for a fall-back copy
> path.
> 
> Signed-off-by: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx>
> ---
>  crypto/acompress.c                  | 201 ++++++++++++++++++++++++++++
>  include/crypto/acompress.h          |  89 ++++++++++--
>  include/crypto/internal/acompress.h |  22 +++
>  3 files changed, 299 insertions(+), 13 deletions(-)
> 
> diff --git a/crypto/acompress.c b/crypto/acompress.c
> index 30176316140a..d2103d4e42cc 100644
> --- a/crypto/acompress.c
> +++ b/crypto/acompress.c
> @@ -23,6 +23,8 @@ struct crypto_scomp;
> 
>  static const struct crypto_type crypto_acomp_type;
> 
> +static void acomp_reqchain_done(void *data, int err);
> +
>  static inline struct acomp_alg *__crypto_acomp_alg(struct crypto_alg *alg)
>  {
>  	return container_of(alg, struct acomp_alg, calg.base);
> @@ -153,6 +155,205 @@ void acomp_request_free(struct acomp_req *req)
>  }
>  EXPORT_SYMBOL_GPL(acomp_request_free);
> 
> +static bool acomp_request_has_nondma(struct acomp_req *req)
> +{
> +	struct acomp_req *r2;
> +
> +	if (acomp_request_isnondma(req))
> +		return true;
> +
> +	list_for_each_entry(r2, &req->base.list, base.list)
> +		if (acomp_request_isnondma(r2))
> +			return true;
> +
> +	return false;
> +}
> +
> +static void acomp_save_req(struct acomp_req *req, crypto_completion_t
> cplt)
> +{
> +	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
> +	struct acomp_req_chain *state = &req->chain;
> +
> +	if (!acomp_is_async(tfm))
> +		return;
> +
> +	state->compl = req->base.complete;
> +	state->data = req->base.data;
> +	req->base.complete = cplt;
> +	req->base.data = state;
> +	state->req0 = req;
> +}
> +
> +static void acomp_restore_req(struct acomp_req_chain *state)
> +{
> +	struct acomp_req *req = state->req0;
> +	struct crypto_acomp *tfm;
> +
> +	tfm = crypto_acomp_reqtfm(req);
> +	if (!acomp_is_async(tfm))
> +		return;
> +
> +	req->base.complete = state->compl;
> +	req->base.data = state->data;
> +}
> +
> +static void acomp_reqchain_virt(struct acomp_req_chain *state, int err)
> +{
> +	struct acomp_req *req = state->cur;
> +	unsigned int slen = req->slen;
> +	unsigned int dlen = req->dlen;
> +
> +	req->base.err = err;
> +	if (!state->src)
> +		return;
> +
> +	acomp_request_set_virt(req, state->src, state->dst, slen, dlen);
> +	state->src = NULL;
> +}
> +
> +static int acomp_reqchain_finish(struct acomp_req_chain *state,
> +				 int err, u32 mask)
> +{
> +	struct acomp_req *req0 = state->req0;
> +	struct acomp_req *req = state->cur;
> +	struct acomp_req *n;
> +
> +	acomp_reqchain_virt(state, err);

Unless I am missing something, this seems to be future-proofing, based
on the initial checks you've implemented in acomp_do_req_chain().

> +
> +	if (req != req0)
> +		list_add_tail(&req->base.list, &req0->base.list);
> +
> +	list_for_each_entry_safe(req, n, &state->head, base.list) {
> +		list_del_init(&req->base.list);
> +
> +		req->base.flags &= mask;
> +		req->base.complete = acomp_reqchain_done;
> +		req->base.data = state;
> +		state->cur = req;
> +
> +		if (acomp_request_isvirt(req)) {
> +			unsigned int slen = req->slen;
> +			unsigned int dlen = req->dlen;
> +			const u8 *svirt = req->svirt;
> +			u8 *dvirt = req->dvirt;
> +
> +			state->src = svirt;
> +			state->dst = dvirt;
> +
> +			sg_init_one(&state->ssg, svirt, slen);
> +			sg_init_one(&state->dsg, dvirt, dlen);
> +
> +			acomp_request_set_params(req, &state->ssg,
> &state->dsg,
> +						 slen, dlen);
> +		}
> +
> +		err = state->op(req);
> +
> +		if (err == -EINPROGRESS) {
> +			if (!list_empty(&state->head))
> +				err = -EBUSY;
> +			goto out;
> +		}
> +
> +		if (err == -EBUSY)
> +			goto out;

This is a fully synchronous way of processing the request chain, and
will not work for iaa_crypto's submit-then-poll-for-completions paradigm,
essential for us to process the compressions in parallel in hardware.
Without parallelism, we will not derive the full benefits of IAA.

Would you be willing to incorporate the acomp_do_async_req_chain()
that I have implemented in v8 of my patch-series [1], to enable the iaa_crypto
driver's async way of processing the request chain to get the parallelism,
and/or adapt your implementation to enable this?

Better still, if you agree that the virtual address support is entirely future-proofing,
I would like to request you to consider reviewing and improving my well-validated
implementation of request chaining in [1], with the goal of merging it in with
parallel/series support for the reqchain, and introduce virtual address support
at a later time. 

[1] https://patchwork.kernel.org/project/linux-mm/patch/20250303084724.6490-2-kanchana.p.sridhar@xxxxxxxxx/


> +
> +		acomp_reqchain_virt(state, err);

Is this really needed? From what I can understand, the important thing this
call does for the implementation, is to set the req->base.err. It seems like
compute overhead (which matters for kernel users like zswap) for setting
the request's error status.

In general, the calls to virtual address support are a bit confusing, since you
check right upfront in acomp_do_req_chain()
"if (acomp_request_has_nondma(req)) return -EINVAL".

Imo, it appears that this is all we need until there are in kernel users that
require the virtual address future-proofing. Please correct me if I am missing
something significant.

Also, is my understanding correct that zswap code that sets up the SG lists
for compress/decompress are not impacted by this?


> +		list_add_tail(&req->base.list, &req0->base.list);
> +	}
> +
> +	acomp_restore_req(state);
> +
> +out:
> +	return err;
> +}
> +
> +static void acomp_reqchain_done(void *data, int err)
> +{
> +	struct acomp_req_chain *state = data;
> +	crypto_completion_t compl = state->compl;
> +
> +	data = state->data;
> +
> +	if (err == -EINPROGRESS) {
> +		if (!list_empty(&state->head))
> +			return;
> +		goto notify;
> +	}
> +
> +	err = acomp_reqchain_finish(state, err,
> CRYPTO_TFM_REQ_MAY_BACKLOG);
> +	if (err == -EBUSY)
> +		return;
> +
> +notify:
> +	compl(data, err);
> +}
> +
> +static int acomp_do_req_chain(struct acomp_req *req,
> +			      int (*op)(struct acomp_req *req))
> +{
> +	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
> +	struct acomp_req_chain *state = &req->chain;
> +	int err;
> +
> +	if (crypto_acomp_req_chain(tfm) ||
> +	    (!acomp_request_chained(req) && !acomp_request_isvirt(req)))
> +		return op(req);

Isn't this a bug? If an algorithm opts-in and sets CRYPTO_ALG_REQ_CHAIN
in its cra_flags, the above statement will always be true, the "op" will be
called on the first request, and this will return. Am I missing something?

> +
> +	/*
> +	 * There are no in-kernel users that do this.  If and ever
> +	 * such users come into being then we could add a fall-back
> +	 * path.
> +	 */
> +	if (acomp_request_has_nondma(req))
> +		return -EINVAL;

As mentioned earlier, is this sufficient for now, and is the virtual address
support really future-proofing?

> +
> +	if (acomp_is_async(tfm)) {
> +		acomp_save_req(req, acomp_reqchain_done);
> +		state = req->base.data;
> +	}
> +
> +	state->op = op;
> +	state->cur = req;
> +	state->src = NULL;
> +	INIT_LIST_HEAD(&state->head);
> +	list_splice_init(&req->base.list, &state->head);
> +
> +	if (acomp_request_isvirt(req)) {

Based on the above check for acomp_request_has_nondma(), it should never
get here, IIUC?

In general, can you shed some light on how you envision zswap code to
change based on this patchset?

Thanks,
Kanchana

> +		unsigned int slen = req->slen;
> +		unsigned int dlen = req->dlen;
> +		const u8 *svirt = req->svirt;
> +		u8 *dvirt = req->dvirt;
> +
> +		state->src = svirt;
> +		state->dst = dvirt;
> +
> +		sg_init_one(&state->ssg, svirt, slen);
> +		sg_init_one(&state->dsg, dvirt, dlen);
> +
> +		acomp_request_set_params(req, &state->ssg, &state->dsg,
> +					 slen, dlen);
> +	}
> +
> +	err = op(req);
> +	if (err == -EBUSY || err == -EINPROGRESS)
> +		return -EBUSY;
> +
> +	return acomp_reqchain_finish(state, err, ~0);
> +}
> +
> +int crypto_acomp_compress(struct acomp_req *req)
> +{
> +	return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)-
> >compress);
> +}
> +EXPORT_SYMBOL_GPL(crypto_acomp_compress);
> +
> +int crypto_acomp_decompress(struct acomp_req *req)
> +{
> +	return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)-
> >decompress);
> +}
> +EXPORT_SYMBOL_GPL(crypto_acomp_decompress);
> +
>  void comp_prepare_alg(struct comp_alg_common *alg)
>  {
>  	struct crypto_alg *base = &alg->base;
> diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
> index b6d5136e689d..15bb13e47f8b 100644
> --- a/include/crypto/acompress.h
> +++ b/include/crypto/acompress.h
> @@ -12,10 +12,34 @@
>  #include <linux/atomic.h>
>  #include <linux/container_of.h>
>  #include <linux/crypto.h>
> +#include <linux/scatterlist.h>
> +#include <linux/types.h>
> 
>  #define CRYPTO_ACOMP_ALLOC_OUTPUT	0x00000001
> +
> +/* Set this bit for virtual address instead of SG list. */
> +#define CRYPTO_ACOMP_REQ_VIRT		0x00000002
> +
> +/* Set this bit for if virtual address buffer cannot be used for DMA. */
> +#define CRYPTO_ACOMP_REQ_NONDMA		0x00000004
> +
>  #define CRYPTO_ACOMP_DST_MAX		131072
> 
> +struct acomp_req;
> +
> +struct acomp_req_chain {
> +	struct list_head head;
> +	struct acomp_req *req0;
> +	struct acomp_req *cur;
> +	int (*op)(struct acomp_req *req);
> +	crypto_completion_t compl;
> +	void *data;
> +	struct scatterlist ssg;
> +	struct scatterlist dsg;
> +	const u8 *src;
> +	u8 *dst;
> +};
> +
>  /**
>   * struct acomp_req - asynchronous (de)compression request
>   *
> @@ -24,14 +48,24 @@
>   * @dst:	Destination data
>   * @slen:	Size of the input buffer
>   * @dlen:	Size of the output buffer and number of bytes produced
> + * @chain:	Private API code data, do not use
>   * @__ctx:	Start of private context data
>   */
>  struct acomp_req {
>  	struct crypto_async_request base;
> -	struct scatterlist *src;
> -	struct scatterlist *dst;
> +	union {
> +		struct scatterlist *src;
> +		const u8 *svirt;
> +	};
> +	union {
> +		struct scatterlist *dst;
> +		u8 *dvirt;
> +	};
>  	unsigned int slen;
>  	unsigned int dlen;
> +
> +	struct acomp_req_chain chain;
> +
>  	void *__ctx[] CRYPTO_MINALIGN_ATTR;
>  };
> 
> @@ -200,10 +234,14 @@ static inline void
> acomp_request_set_callback(struct acomp_req *req,
>  					      crypto_completion_t cmpl,
>  					      void *data)
>  {
> +	u32 keep = CRYPTO_ACOMP_ALLOC_OUTPUT |
> CRYPTO_ACOMP_REQ_VIRT;
> +
>  	req->base.complete = cmpl;
>  	req->base.data = data;
> -	req->base.flags &= CRYPTO_ACOMP_ALLOC_OUTPUT;
> -	req->base.flags |= flgs & ~CRYPTO_ACOMP_ALLOC_OUTPUT;
> +	req->base.flags &= keep;
> +	req->base.flags |= flgs & ~keep;
> +
> +	crypto_reqchain_init(&req->base);
>  }
> 
>  /**
> @@ -230,11 +268,42 @@ static inline void
> acomp_request_set_params(struct acomp_req *req,
>  	req->slen = slen;
>  	req->dlen = dlen;
> 
> -	req->base.flags &= ~CRYPTO_ACOMP_ALLOC_OUTPUT;
> +	req->base.flags &= ~(CRYPTO_ACOMP_ALLOC_OUTPUT |
> CRYPTO_ACOMP_REQ_VIRT);
>  	if (!req->dst)
>  		req->base.flags |= CRYPTO_ACOMP_ALLOC_OUTPUT;
>  }
> 
> +/**
> + * acomp_request_set_virt() -- Sets virtual address request parameters
> + *
> + * Sets virtual address parameters required by an acomp operation
> + *
> + * @req:	asynchronous compress request
> + * @src:	virtual address pointer to input buffer
> + * @dst:	virtual address pointer to output buffer.
> + * @slen:	size of the input buffer
> + * @dlen:	size of the output buffer.
> + */
> +static inline void acomp_request_set_virt(struct acomp_req *req,
> +					  const u8 *src, u8 *dst,
> +					  unsigned int slen,
> +					  unsigned int dlen)
> +{
> +	req->svirt = src;
> +	req->dvirt = dst;
> +	req->slen = slen;
> +	req->dlen = dlen;
> +
> +	req->base.flags &= ~CRYPTO_ACOMP_ALLOC_OUTPUT;
> +	req->base.flags |= CRYPTO_ACOMP_REQ_VIRT;
> +}
> +
> +static inline void acomp_request_chain(struct acomp_req *req,
> +				       struct acomp_req *head)
> +{
> +	crypto_request_chain(&req->base, &head->base);
> +}
> +
>  /**
>   * crypto_acomp_compress() -- Invoke asynchronous compress operation
>   *
> @@ -244,10 +313,7 @@ static inline void acomp_request_set_params(struct
> acomp_req *req,
>   *
>   * Return:	zero on success; error code in case of error
>   */
> -static inline int crypto_acomp_compress(struct acomp_req *req)
> -{
> -	return crypto_acomp_reqtfm(req)->compress(req);
> -}
> +int crypto_acomp_compress(struct acomp_req *req);
> 
>  /**
>   * crypto_acomp_decompress() -- Invoke asynchronous decompress
> operation
> @@ -258,9 +324,6 @@ static inline int crypto_acomp_compress(struct
> acomp_req *req)
>   *
>   * Return:	zero on success; error code in case of error
>   */
> -static inline int crypto_acomp_decompress(struct acomp_req *req)
> -{
> -	return crypto_acomp_reqtfm(req)->decompress(req);
> -}
> +int crypto_acomp_decompress(struct acomp_req *req);
> 
>  #endif
> diff --git a/include/crypto/internal/acompress.h
> b/include/crypto/internal/acompress.h
> index 8831edaafc05..b3b48dea7f2f 100644
> --- a/include/crypto/internal/acompress.h
> +++ b/include/crypto/internal/acompress.h
> @@ -109,4 +109,26 @@ void crypto_unregister_acomp(struct acomp_alg
> *alg);
>  int crypto_register_acomps(struct acomp_alg *algs, int count);
>  void crypto_unregister_acomps(struct acomp_alg *algs, int count);
> 
> +static inline bool acomp_request_chained(struct acomp_req *req)
> +{
> +	return crypto_request_chained(&req->base);
> +}
> +
> +static inline bool acomp_request_isvirt(struct acomp_req *req)
> +{
> +	return req->base.flags & CRYPTO_ACOMP_REQ_VIRT;
> +}
> +
> +static inline bool acomp_request_isnondma(struct acomp_req *req)
> +{
> +	return (req->base.flags &
> +		(CRYPTO_ACOMP_REQ_NONDMA |
> CRYPTO_ACOMP_REQ_VIRT)) ==
> +	       (CRYPTO_ACOMP_REQ_NONDMA |
> CRYPTO_ACOMP_REQ_VIRT);
> +}
> +
> +static inline bool crypto_acomp_req_chain(struct crypto_acomp *tfm)
> +{
> +	return crypto_tfm_req_chain(&tfm->base);
> +}
> +
>  #endif
> --
> 2.39.5






[Index of Archives]     [Kernel]     [Gnu Classpath]     [Gnu Crypto]     [DM Crypt]     [Netfilter]     [Bugtraq]
  Powered by Linux