On Thu, Oct 26, 2023 at 02:36:44AM +0800, Jerry Shih wrote: > diff --git a/arch/riscv/crypto/chacha-riscv64-glue.c b/arch/riscv/crypto/chacha-riscv64-glue.c > new file mode 100644 > index 000000000000..72011949f705 > --- /dev/null > +++ b/arch/riscv/crypto/chacha-riscv64-glue.c > @@ -0,0 +1,120 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Port of the OpenSSL ChaCha20 implementation for RISC-V 64 > + * > + * Copyright (C) 2023 SiFive, Inc. > + * Author: Jerry Shih <jerry.shih@xxxxxxxxxx> > + */ > + > +#include <asm/simd.h> > +#include <asm/vector.h> > +#include <crypto/internal/chacha.h> > +#include <crypto/internal/simd.h> > +#include <crypto/internal/skcipher.h> > +#include <linux/crypto.h> > +#include <linux/module.h> > +#include <linux/types.h> > + > +#define CHACHA_BLOCK_VALID_SIZE_MASK (~(CHACHA_BLOCK_SIZE - 1)) > +#define CHACHA_BLOCK_REMAINING_SIZE_MASK (CHACHA_BLOCK_SIZE - 1) > +#define CHACHA_KEY_OFFSET 4 > +#define CHACHA_IV_OFFSET 12 > + > +/* chacha20 using zvkb vector crypto extension */ > +void ChaCha20_ctr32_zvkb(u8 *out, const u8 *input, size_t len, const u32 *key, > + const u32 *counter); > + > +static int chacha20_encrypt(struct skcipher_request *req) > +{ > + u32 state[CHACHA_STATE_WORDS]; This function doesn't need to create the whole state matrix on the stack, since the underlying assembly function takes as input the key and counter, not the state matrix. I recommend something like the following: diff --git a/arch/riscv/crypto/chacha-riscv64-glue.c b/arch/riscv/crypto/chacha-riscv64-glue.c index df185d0663fcc..216b4cd9d1e01 100644 --- a/arch/riscv/crypto/chacha-riscv64-glue.c +++ b/arch/riscv/crypto/chacha-riscv64-glue.c @@ -16,45 +16,42 @@ #include <linux/module.h> #include <linux/types.h> -#define CHACHA_KEY_OFFSET 4 -#define CHACHA_IV_OFFSET 12 - /* chacha20 using zvkb vector crypto extension */ asmlinkage void ChaCha20_ctr32_zvkb(u8 *out, const u8 *input, size_t len, const u32 *key, const u32 *counter); static int chacha20_encrypt(struct skcipher_request *req) { - u32 state[CHACHA_STATE_WORDS]; u8 block_buffer[CHACHA_BLOCK_SIZE]; struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; unsigned int nbytes; unsigned int tail_bytes; + u32 iv[4]; int err; - chacha_init_generic(state, ctx->key, req->iv); + iv[0] = get_unaligned_le32(req->iv); + iv[1] = get_unaligned_le32(req->iv + 4); + iv[2] = get_unaligned_le32(req->iv + 8); + iv[3] = get_unaligned_le32(req->iv + 12); err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes) { - nbytes = walk.nbytes & (~(CHACHA_BLOCK_SIZE - 1)); + nbytes = walk.nbytes & ~(CHACHA_BLOCK_SIZE - 1); tail_bytes = walk.nbytes & (CHACHA_BLOCK_SIZE - 1); kernel_vector_begin(); if (nbytes) { ChaCha20_ctr32_zvkb(walk.dst.virt.addr, walk.src.virt.addr, nbytes, - state + CHACHA_KEY_OFFSET, - state + CHACHA_IV_OFFSET); - state[CHACHA_IV_OFFSET] += nbytes / CHACHA_BLOCK_SIZE; + ctx->key, iv); + iv[0] += nbytes / CHACHA_BLOCK_SIZE; } if (walk.nbytes == walk.total && tail_bytes > 0) { memcpy(block_buffer, walk.src.virt.addr + nbytes, tail_bytes); ChaCha20_ctr32_zvkb(block_buffer, block_buffer, - CHACHA_BLOCK_SIZE, - state + CHACHA_KEY_OFFSET, - state + CHACHA_IV_OFFSET); + CHACHA_BLOCK_SIZE, ctx->key, iv); memcpy(walk.dst.virt.addr + nbytes, block_buffer, tail_bytes); tail_bytes = 0;